diff --git a/.github/actions/determine-tags/action.yml b/.github/actions/determine-tags/action.yml index 9a01cd6a4b..757a61a3bd 100644 --- a/.github/actions/determine-tags/action.yml +++ b/.github/actions/determine-tags/action.yml @@ -60,7 +60,7 @@ runs: # Fetch the latest release tag for release events if [ "$EVENT_NAME" == "release" ]; then RELEASE=$BRANCH - NORMALIZED_RELEASE=$(if echo "$RELEASE" | grep -Eq '^v?[0-9]+\.[0-9]+\.[0-9]$'; then echo "$RELEASE" | sed 's/^v//'; else echo "$RELEASE"; fi) + NORMALIZED_RELEASE=$(if echo "$RELEASE" | grep -Eq '^v?[0-9]+\.[0-9]+\.[0-9]+$'; then echo "$RELEASE" | sed 's/^v//'; else echo "$RELEASE"; fi) echo "Normalized release: $NORMALIZED_RELEASE" LATEST_RELEASE=$(curl -s -H "Authorization: Bearer $TOKEN" \ diff --git a/.github/workflows/build-inference-exp.yml b/.github/workflows/docker.inference-exp.yml similarity index 98% rename from .github/workflows/build-inference-exp.yml rename to .github/workflows/docker.inference-exp.yml index a802e75625..b8f549299f 100644 --- a/.github/workflows/build-inference-exp.yml +++ b/.github/workflows/docker.inference-exp.yml @@ -55,6 +55,7 @@ jobs: echo "base-tag=${BASE_TAG}" >> $GITHUB_OUTPUT build: + name: ${{ matrix.dockerfile }}:${{ matrix.platform }} needs: determine-tags runs-on: ubuntu-latest timeout-minutes: 120 diff --git a/.github/workflows/publish.pypi.inference_exp.yml b/.github/workflows/publish.pypi.inference_exp.yml new file mode 100644 index 0000000000..4a9772cbd0 --- /dev/null +++ b/.github/workflows/publish.pypi.inference_exp.yml @@ -0,0 +1,70 @@ +name: Publish Inference Experimental Wheels to PyPi +on: + release: + types: [created] + workflow_dispatch: + inputs: + publish: + description: "Actually publish the package to PyPI" + required: false + default: false + type: boolean + pre_release: + description: "Mark as pre-release" + required: false + default: false + type: boolean + +permissions: + contents: read + id-token: write + +jobs: + build: + name: ${{ github.event_name == 'release' && 'Release publish' || (github.event.inputs.publish == 'true' && (github.event.inputs.pre_release == 'true' && 'Manual publish (pre-release)' || 'Manual publish (rejected - non-prerelease)') || 'Manual build only') }} + runs-on: + labels: depot-ubuntu-22.04-small + group: public-depot + timeout-minutes: 20 + strategy: + matrix: + python-version: ["3.12"] + steps: + - name: 🛎️ Checkout + uses: actions/checkout@v4 + - name: 🐍 Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + check-latest: true + - name: 📦 Install dependencies + working-directory: inference_experimental + run: | + python -m pip install --upgrade pip + python -m pip install uv + - name: 🏷️ Modify version for pre-release + if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.pre_release == 'true' }} + working-directory: inference_experimental + run: | + CURRENT_VERSION=$(grep -m 1 '^version = ' pyproject.toml | sed 's/version = "\(.*\)"/\1/') + echo "Current version: $CURRENT_VERSION" + + if [[ $CURRENT_VERSION =~ (a|b|rc)[0-9]+$ ]]; then + echo "Version already has pre-release suffix, keeping as is" + else + TIMESTAMP=$(date +%Y%m%d%H%M%S) + NEW_VERSION="${CURRENT_VERSION}rc${TIMESTAMP}" + echo "New pre-release version: $NEW_VERSION" + sed -i.bak "s/^version = \"${CURRENT_VERSION}\"/version = \"${NEW_VERSION}\"/" pyproject.toml + rm pyproject.toml.bak + fi + - name: 🔨 Build package + working-directory: inference_experimental + run: | + python -m uv build + - name: 🚀 Publish to PyPI + if: ${{ github.event_name == 'release' || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish == 'true') }} + uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: inference_experimental/dist/ + skip-existing: true diff --git a/.release/pypi/inference.core.setup.py b/.release/pypi/inference.core.setup.py index 03622c823e..0f3588854c 100644 --- a/.release/pypi/inference.core.setup.py +++ b/.release/pypi/inference.core.setup.py @@ -73,7 +73,6 @@ def read_requirements(path): "hosted": read_requirements("requirements/requirements.hosted.txt"), "http": read_requirements("requirements/requirements.http.txt"), "sam": read_requirements("requirements/requirements.sam.txt"), - "waf": read_requirements("requirements/requirements.waf.txt"), "yolo-world": read_requirements("requirements/requirements.yolo_world.txt"), "transformers": read_requirements("requirements/requirements.transformers.txt"), }, diff --git a/.release/pypi/inference.cpu.setup.py b/.release/pypi/inference.cpu.setup.py index 58d469d83f..b7f75cda01 100644 --- a/.release/pypi/inference.cpu.setup.py +++ b/.release/pypi/inference.cpu.setup.py @@ -72,7 +72,6 @@ def read_requirements(path): "hosted": read_requirements("requirements/requirements.hosted.txt"), "http": read_requirements("requirements/requirements.http.txt"), "sam": read_requirements("requirements/requirements.sam.txt"), - "waf": read_requirements("requirements/requirements.waf.txt"), "yolo-world": read_requirements("requirements/requirements.yolo_world.txt"), "transformers": read_requirements("requirements/requirements.transformers.txt"), }, diff --git a/.release/pypi/inference.gpu.setup.py b/.release/pypi/inference.gpu.setup.py index 3c5954d8a1..9f725aa943 100644 --- a/.release/pypi/inference.gpu.setup.py +++ b/.release/pypi/inference.gpu.setup.py @@ -72,7 +72,6 @@ def read_requirements(path): "hosted": read_requirements("requirements/requirements.hosted.txt"), "http": read_requirements("requirements/requirements.http.txt"), "sam": read_requirements("requirements/requirements.sam.txt"), - "waf": read_requirements("requirements/requirements.waf.txt"), "yolo-world": read_requirements("requirements/requirements.yolo_world.txt"), "transformers": read_requirements("requirements/requirements.transformers.txt"), }, diff --git a/.release/pypi/inference.setup.py b/.release/pypi/inference.setup.py index f1ec61c044..ea00d55848 100644 --- a/.release/pypi/inference.setup.py +++ b/.release/pypi/inference.setup.py @@ -72,7 +72,6 @@ def read_requirements(path): "hosted": read_requirements("requirements/requirements.hosted.txt"), "http": read_requirements("requirements/requirements.http.txt"), "sam": read_requirements("requirements/requirements.sam.txt"), - "waf": read_requirements("requirements/requirements.waf.txt"), "yolo-world": read_requirements("requirements/requirements.yolo_world.txt"), "transformers": read_requirements("requirements/requirements.transformers.txt"), }, diff --git a/docker/dockerfiles/Dockerfile.onnx.cpu b/docker/dockerfiles/Dockerfile.onnx.cpu index 722849868d..00b56fc0c8 100644 --- a/docker/dockerfiles/Dockerfile.onnx.cpu +++ b/docker/dockerfiles/Dockerfile.onnx.cpu @@ -19,7 +19,6 @@ COPY requirements/requirements.sam.txt \ requirements/requirements.cpu.txt \ requirements/requirements.vino.txt \ requirements/requirements.http.txt \ - requirements/requirements.waf.txt \ requirements/requirements.gaze.txt \ requirements/requirements.doctr.txt \ requirements/requirements.groundingdino.txt \ @@ -35,7 +34,6 @@ RUN pip3 install \ -r requirements.clip.txt \ -r requirements.cpu.txt \ -r requirements.http.txt \ - -r requirements.waf.txt \ -r requirements.gaze.txt \ -r requirements.doctr.txt \ -r requirements.groundingdino.txt \ diff --git a/docker/dockerfiles/Dockerfile.onnx.cpu.dev b/docker/dockerfiles/Dockerfile.onnx.cpu.dev index f1795bf0a1..71c068209b 100644 --- a/docker/dockerfiles/Dockerfile.onnx.cpu.dev +++ b/docker/dockerfiles/Dockerfile.onnx.cpu.dev @@ -20,7 +20,6 @@ COPY requirements/requirements.sam.txt \ requirements/requirements.cpu.txt \ requirements/requirements.vino.txt \ requirements/requirements.http.txt \ - requirements/requirements.waf.txt \ requirements/requirements.gaze.txt \ requirements/requirements.doctr.txt \ requirements/requirements.groundingdino.txt \ @@ -36,7 +35,6 @@ RUN pip3 install \ -r requirements.clip.txt \ -r requirements.cpu.txt \ -r requirements.http.txt \ - -r requirements.waf.txt \ -r requirements.gaze.txt \ -r requirements.doctr.txt \ -r requirements.groundingdino.txt \ diff --git a/docker/dockerfiles/Dockerfile.onnx.cpu.parallel b/docker/dockerfiles/Dockerfile.onnx.cpu.parallel index ba88d47c92..6f1b31003f 100644 --- a/docker/dockerfiles/Dockerfile.onnx.cpu.parallel +++ b/docker/dockerfiles/Dockerfile.onnx.cpu.parallel @@ -20,7 +20,6 @@ COPY requirements/requirements.sam.txt \ requirements/requirements.clip.txt \ requirements/requirements.cpu.txt \ requirements/requirements.http.txt \ - requirements/requirements.waf.txt \ requirements/requirements.gaze.txt \ requirements/requirements.doctr.txt \ requirements/requirements.parallel.txt \ @@ -39,7 +38,6 @@ RUN pip3 install \ -r requirements.clip.txt \ -r requirements.cpu.txt \ -r requirements.http.txt \ - -r requirements.waf.txt \ -r requirements.gaze.txt \ -r requirements.doctr.txt \ -r requirements.parallel.txt \ diff --git a/docker/dockerfiles/Dockerfile.onnx.cpu.slim b/docker/dockerfiles/Dockerfile.onnx.cpu.slim index 7dbcc308f6..f5dcb32f94 100644 --- a/docker/dockerfiles/Dockerfile.onnx.cpu.slim +++ b/docker/dockerfiles/Dockerfile.onnx.cpu.slim @@ -18,7 +18,6 @@ RUN apt update -y && apt install -y \ COPY requirements/requirements.cpu.txt \ requirements/requirements.http.txt \ - requirements/requirements.waf.txt \ requirements/_requirements.txt \ requirements/requirements.vino.txt \ requirements/requirements.cli.txt \ @@ -32,7 +31,6 @@ RUN pip3 install \ -r _requirements.txt \ -r requirements.cpu.txt \ -r requirements.http.txt \ - -r requirements.waf.txt \ -r requirements.cli.txt \ -r requirements.sdk.http.txt \ "setuptools<=75.5.0" \ diff --git a/docker/dockerfiles/Dockerfile.onnx.gpu b/docker/dockerfiles/Dockerfile.onnx.gpu index 32783f9992..03fe73e756 100644 --- a/docker/dockerfiles/Dockerfile.onnx.gpu +++ b/docker/dockerfiles/Dockerfile.onnx.gpu @@ -19,7 +19,6 @@ COPY requirements/requirements.sam.txt \ requirements/requirements.clip.txt \ requirements/requirements.http.txt \ requirements/requirements.gpu.txt \ - requirements/requirements.waf.txt \ requirements/requirements.gaze.txt \ requirements/requirements.doctr.txt \ requirements/requirements.groundingdino.txt \ @@ -36,7 +35,6 @@ RUN python3 -m pip install \ -r requirements.clip.txt \ -r requirements.http.txt \ -r requirements.gpu.txt \ - -r requirements.waf.txt \ -r requirements.gaze.txt \ -r requirements.groundingdino.txt \ -r requirements.doctr.txt \ diff --git a/docker/dockerfiles/Dockerfile.onnx.gpu.dev b/docker/dockerfiles/Dockerfile.onnx.gpu.dev index db67ecdb65..9842fea480 100644 --- a/docker/dockerfiles/Dockerfile.onnx.gpu.dev +++ b/docker/dockerfiles/Dockerfile.onnx.gpu.dev @@ -20,7 +20,6 @@ COPY requirements/requirements.sam.txt \ requirements/requirements.clip.txt \ requirements/requirements.http.txt \ requirements/requirements.gpu.txt \ - requirements/requirements.waf.txt \ requirements/requirements.gaze.txt \ requirements/requirements.doctr.txt \ requirements/requirements.groundingdino.txt \ @@ -39,7 +38,6 @@ RUN python3 -m pip install \ -r requirements.clip.txt \ -r requirements.http.txt \ -r requirements.gpu.txt \ - -r requirements.waf.txt \ -r requirements.gaze.txt \ -r requirements.groundingdino.txt \ -r requirements.doctr.txt \ diff --git a/docker/dockerfiles/Dockerfile.onnx.gpu.parallel b/docker/dockerfiles/Dockerfile.onnx.gpu.parallel index 5e39088fdb..2fc0884bbd 100644 --- a/docker/dockerfiles/Dockerfile.onnx.gpu.parallel +++ b/docker/dockerfiles/Dockerfile.onnx.gpu.parallel @@ -16,7 +16,6 @@ COPY requirements/requirements.sam.txt \ requirements/requirements.clip.txt \ requirements/requirements.http.txt \ requirements/requirements.gpu.txt \ - requirements/requirements.waf.txt \ requirements/requirements.gaze.txt \ requirements/requirements.parallel.txt \ requirements/_requirements.txt \ @@ -28,7 +27,6 @@ RUN pip3 install --upgrade pip && pip3 install \ -r requirements.clip.txt \ -r requirements.http.txt \ -r requirements.gpu.txt \ - -r requirements.waf.txt \ -r requirements.gaze.txt \ -r requirements.parallel.txt \ "setuptools<=75.5.0" \ diff --git a/docker/dockerfiles/Dockerfile.onnx.gpu.slim b/docker/dockerfiles/Dockerfile.onnx.gpu.slim index 6d783a9e50..318fa97ed1 100644 --- a/docker/dockerfiles/Dockerfile.onnx.gpu.slim +++ b/docker/dockerfiles/Dockerfile.onnx.gpu.slim @@ -15,7 +15,6 @@ RUN rm -rf /var/lib/apt/lists/* && apt-get clean && apt-get update -y && DEBIAN_ COPY requirements/requirements.http.txt \ requirements/requirements.gpu.txt \ - requirements/requirements.waf.txt \ requirements/_requirements.txt \ requirements/requirements.cli.txt \ requirements/requirements.sdk.http.txt \ @@ -25,7 +24,6 @@ RUN pip3 install --upgrade pip && pip3 install \ -r _requirements.txt \ -r requirements.http.txt \ -r requirements.gpu.txt \ - -r requirements.waf.txt \ -r requirements.cli.txt \ -r requirements.sdk.http.txt \ "setuptools<=75.5.0" \ diff --git a/docker/dockerfiles/Dockerfile.onnx.trt b/docker/dockerfiles/Dockerfile.onnx.trt index d8785505fb..84a5542078 100644 --- a/docker/dockerfiles/Dockerfile.onnx.trt +++ b/docker/dockerfiles/Dockerfile.onnx.trt @@ -14,7 +14,6 @@ RUN apt-get update -y && apt-get install -y \ COPY requirements/requirements.sam.txt \ requirements/requirements.clip.txt \ requirements/requirements.http.txt \ - requirements/requirements.waf.txt \ requirements/requirements.gpu.txt \ requirements/requirements.gaze.txt \ requirements/requirements.doctr.txt \ @@ -28,7 +27,6 @@ RUN pip install --upgrade pip setuptools && pip install \ -r requirements.sam.txt \ -r requirements.clip.txt \ -r requirements.http.txt \ - -r requirements.waf.txt \ -r requirements.gpu.txt \ -r requirements.gaze.txt \ -r requirements.doctr.txt \ diff --git a/docs/download.md b/docs/download.md new file mode 100644 index 0000000000..c100918266 --- /dev/null +++ b/docs/download.md @@ -0,0 +1,92 @@ +# Downloading Roboflow Inference + +
+

Thanks for Downloading Roboflow Inference!

+ +

Your download should start automatically. If it doesn't, click here to download manually.

+
+ + + + \ No newline at end of file diff --git a/docs/images/macos-icon.svg b/docs/images/macos-icon.svg new file mode 100644 index 0000000000..1331a6b65d --- /dev/null +++ b/docs/images/macos-icon.svg @@ -0,0 +1,3 @@ + + + \ No newline at end of file diff --git a/docs/images/windows-icon.svg b/docs/images/windows-icon.svg new file mode 100644 index 0000000000..1b033ff6b0 --- /dev/null +++ b/docs/images/windows-icon.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/install/index.md b/docs/install/index.md index 2c34793e1d..1db8e82660 100644 --- a/docs/install/index.md +++ b/docs/install/index.md @@ -2,19 +2,37 @@ You can now run Roboflow Inference Server on your Windows or macOS machine with our native desktop applications! This is the quickest and most effortless way to get up and running. -Simply download the latest installer for your operating system. You can find these attached to our **latest release on GitHub**. - -➡️ **[View Latest Release and Download Installers on Github](https://github.com/roboflow/inference/releases)** +## Download for Latest Version + +
+
+ + Windows Download for Windows + +
+ +
+ + macOS Download for Mac + +
+
+ +

+ I need a previous release +

+ +## Installation Instructions ### Windows (x86) - - [Download the latest installer](https://github.com/roboflow/inference/releases) and run it to install Roboflow Inference + - [Download the latest installer](https://github.com/roboflow/inference/releases/download/v{{ VERSION }}/inference-{{ VERSION }}-installer.exe) and run it to install Roboflow Inference - When the install is finished it will offer to launch the Inference server after the setup completes - To stop the inference server simply close the terminal window it opens - To start it again later, you can find Roboflow Inference in your Start Menu ### MacOS (Apple Silicon) - - [Download the Roboflow Inference DMG](https://github.com/roboflow/inference/releases) disk image - - Mount hte disk image by double clicking it + - [Download the Roboflow Inference DMG](https://github.com/roboflow/inference/releases/download/v{{ VERSION }}/Roboflow-Inference-{{ VERSION }}.dmg) + - Mount the DMG by double clicking it - Drag the Roboflow Inference App to the Application Folder - Go to your Application Folder and double click the Roboflow Inference App to start the server diff --git a/docs/scripts/macros.py b/docs/scripts/macros.py new file mode 100644 index 0000000000..23dfbc1b25 --- /dev/null +++ b/docs/scripts/macros.py @@ -0,0 +1,33 @@ +import sys +from pathlib import Path + +def define_env(env): + """Hook function to define macros for MkDocs.""" + + @env.macro + def get_version(): + """Read version from inference/core/version.py""" + # Find the root of the repository by iterating up parent directories + current_path = Path(__file__).resolve() + for parent in current_path.parents: + # Check if this directory contains the 'inference' subdirectory + if (parent / 'inference').is_dir(): + repo_root = parent + break + else: + raise FileNotFoundError("Could not find repository root with 'inference' directory") + + version_file_path = repo_root.joinpath('inference', 'core', 'version.py') + + try: + # Execute the version.py file and extract __version__ + namespace = {} + with open(version_file_path, 'r') as f: + exec(f.read(), namespace) + return namespace['__version__'] + except Exception as e: + print(f"Warning: Could not read version from {version_file_path}: {e}") + return "unknown" + + # Make VERSION available globally to all templates + env.variables['VERSION'] = get_version() \ No newline at end of file diff --git a/docs/styles.css b/docs/styles.css index 9ab43e5984..05deb2d046 100644 --- a/docs/styles.css +++ b/docs/styles.css @@ -109,4 +109,98 @@ .youtube { color: #EE0F0F; +} + +/* Download page styles */ +:root { + --download-border-radius: 0.5rem; + --download-padding: 0.875rem 1.5rem; + --download-gap: 0.625rem; + --download-font-size: 0.875rem; + --download-icon-size: 1.125rem; + --download-shadow: 0 0.25rem 0.75rem var(--md-primary-fg-color--transparent); +} + +.download-container { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(15rem, 1fr)); + gap: 1rem; + margin: 1rem 0; + max-width: 43.75rem; +} + +.download-card { + background: transparent; + padding: 1.5rem; + text-align: center; + display: flex; + flex-direction: column; + align-items: center; + gap: 1rem; +} + +.md-typeset .install-link { + margin: 0; + font-size: var(--download-font-size); +} + +.md-typeset .install-link a { + color: var(--md-default-fg-color--light); + text-decoration: underline; +} + +.md-typeset .install-link a:hover { + color: var(--md-primary-fg-color); +} + +.md-typeset .download-button { + display: inline-flex; + align-items: center; + justify-content: center; + gap: var(--download-gap); + background: transparent; + color: var(--md-primary-fg-color); + border: 0.094rem solid var(--md-primary-fg-color); + padding: var(--download-padding); + border-radius: var(--download-border-radius); + text-decoration: none; + transition: all 0.2s ease; + font-weight: 500; + font-size: var(--download-font-size); + min-width: 12.5rem; + white-space: nowrap; +} + +.md-typeset .download-button:hover { + background: var(--md-primary-fg-color); + color: white; + transform: translateY(-0.063rem); + box-shadow: var(--download-shadow); +} + +.download-button img { + width: var(--download-icon-size); + height: var(--download-icon-size); + flex-shrink: 0; + opacity: 0.8; +} + +.download-button:hover img { + opacity: 1; +} + +@media (max-width: 48rem) { + .download-container { + grid-template-columns: 1fr; + max-width: 100%; + } + + .download-card { + padding: 1.25rem; + } + + .md-typeset .download-button { + min-width: 11.25rem; + padding: 0.75rem 1.25rem; + } } \ No newline at end of file diff --git a/inference/core/cache/redis.py b/inference/core/cache/redis.py index 9eff594d9f..53d4ea5e9f 100644 --- a/inference/core/cache/redis.py +++ b/inference/core/cache/redis.py @@ -62,7 +62,6 @@ def _expire(self): This method runs in an infinite loop and sleeps for MEMORY_CACHE_EXPIRE_INTERVAL seconds between each iteration. """ while True: - logger.debug("Redis cleaner thread starts cleaning...") now = time.time() for k, v in copy(list(self.zexpires.items())): if v < now: @@ -71,7 +70,6 @@ def _expire(self): k[0], k[1] - tolerance_factor, k[1] + tolerance_factor ) del self.zexpires[k] - logger.debug("Redis cleaner finished task.") sleep_time = MEMORY_CACHE_EXPIRE_INTERVAL - (time.time() - now) time.sleep(max(sleep_time, 0)) diff --git a/inference/core/interfaces/http/http_api.py b/inference/core/interfaces/http/http_api.py index 68f7497156..169055ac4f 100644 --- a/inference/core/interfaces/http/http_api.py +++ b/inference/core/interfaces/http/http_api.py @@ -258,8 +258,6 @@ if LAMBDA: from inference.core.usage import trackUsage -if METLO_KEY: - from metlo.fastapi import ASGIMiddleware import time @@ -608,11 +606,6 @@ async def on_shutdown(): InferenceInstrumentator( app, model_manager=model_manager, endpoint="/metrics" ) - - if METLO_KEY: - app.add_middleware( - ASGIMiddleware, host="https://app.metlo.com", api_key=METLO_KEY - ) if LAMBDA: app.add_middleware(LambdaMiddleware) if GCP_SERVERLESS: diff --git a/inference/core/version.py b/inference/core/version.py index 05ab3389d1..805b7cf763 100644 --- a/inference/core/version.py +++ b/inference/core/version.py @@ -1,4 +1,4 @@ -__version__ = "0.51.10" +__version__ = "0.52.0" if __name__ == "__main__": diff --git a/inference/core/workflows/core_steps/models/foundation/openai/v2.py b/inference/core/workflows/core_steps/models/foundation/openai/v2.py index 2282817b72..a6fb50c0ba 100644 --- a/inference/core/workflows/core_steps/models/foundation/openai/v2.py +++ b/inference/core/workflows/core_steps/models/foundation/openai/v2.py @@ -55,7 +55,7 @@ LONG_DESCRIPTION = f""" -Ask a question to OpenAI's GPT-4 with Vision model. +Ask a question to OpenAI's GPT models with vision capabilities (including GPT-4o and GPT-5). You can specify arbitrary text prompts or predefined ones, the block supports the following types of prompt: @@ -168,6 +168,7 @@ class BlockManifest(WorkflowBlockManifest): "gpt-4.1-nano", "gpt-4o", "gpt-4o-mini", + "gpt-5", ], ] = Field( default="gpt-4o", diff --git a/inference/core/workflows/core_steps/models/foundation/openai/v3.py b/inference/core/workflows/core_steps/models/foundation/openai/v3.py index 68fcf46afb..8a122f2b53 100644 --- a/inference/core/workflows/core_steps/models/foundation/openai/v3.py +++ b/inference/core/workflows/core_steps/models/foundation/openai/v3.py @@ -8,10 +8,7 @@ from openai._types import NOT_GIVEN from pydantic import ConfigDict, Field, model_validator -from inference.core.env import ( - API_BASE_URL, - WORKFLOWS_REMOTE_EXECUTION_MAX_STEP_CONCURRENT_REQUESTS, -) +from inference.core.env import WORKFLOWS_REMOTE_EXECUTION_MAX_STEP_CONCURRENT_REQUESTS from inference.core.managers.base import ModelManager from inference.core.roboflow_api import post_to_roboflow_api from inference.core.utils.image_utils import encode_image_to_jpeg_bytes, load_image @@ -61,7 +58,7 @@ LONG_DESCRIPTION = f""" -Ask a question to OpenAI's GPT-4 with Vision model. +Ask a question to OpenAI's GPT models with vision capabilities (including GPT-5 and GPT-4o). You can specify arbitrary text prompts or predefined ones, the block supports the following types of prompt: @@ -94,7 +91,7 @@ class BlockManifest(WorkflowBlockManifest): json_schema_extra={ "name": "OpenAI", "version": "v3", - "short_description": "Run OpenAI's GPT-4 with vision capabilities.", + "short_description": "Run OpenAI's GPT models with vision capabilities.", "long_description": LONG_DESCRIPTION, "license": "Apache-2.0", "block_type": "model", @@ -177,13 +174,16 @@ class BlockManifest(WorkflowBlockManifest): "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", + "gpt-5", + "gpt-5-mini", + "gpt-5-nano", "o3", "o4-mini", ], ] = Field( - default="gpt-4o", + default="gpt-5", description="Model to be used", - examples=["gpt-4o", "$inputs.openai_model"], + examples=["gpt-5", "$inputs.openai_model"], ) image_detail: Union[ Selector(kind=[STRING_KIND]), Literal["auto", "high", "low"] @@ -394,7 +394,7 @@ def _execute_proxied_openai_request( if temperature is not None: payload["temperature"] = temperature - endpoint = f"apiproxy/openai" # Use relative endpoint + endpoint = "apiproxy/openai" # Use relative endpoint try: # Use the Roboflow API post function (this enures proper auth headers used based on invocation context) diff --git a/inference/core/workflows/execution_engine/v1/dynamic_blocks/block_scaffolding.py b/inference/core/workflows/execution_engine/v1/dynamic_blocks/block_scaffolding.py index 4c98ed38d0..3f3d3ab513 100644 --- a/inference/core/workflows/execution_engine/v1/dynamic_blocks/block_scaffolding.py +++ b/inference/core/workflows/execution_engine/v1/dynamic_blocks/block_scaffolding.py @@ -1,3 +1,4 @@ +import traceback import types from typing import List, Type @@ -58,7 +59,20 @@ def run(self, *args, **kwargs) -> BlockResult: "`ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS=True`", context="workflow_execution | step_execution | dynamic_step", ) - return run_function(self, *args, **kwargs) + try: + return run_function(self, *args, **kwargs) + except Exception as error: + tb = traceback.extract_tb(error.__traceback__) + if tb: + frame = tb[-1] + line_number = frame.lineno - len( + _get_python_code_imports(python_code).splitlines() + ) + function_name = frame.name + message = f"Error in line {line_number}, in {function_name}: {error.__class__.__name__}: {error}" + else: + message = f"{error.__class__.__name__}: {error}" + raise Exception(message) from error if python_code.init_function_code is not None and not hasattr( code_module, python_code.init_function_name @@ -94,10 +108,14 @@ def get_manifest(cls) -> Type[WorkflowBlockManifest]: ) +def _get_python_code_imports(python_code: PythonCode) -> str: + return "\n".join(IMPORTS_LINES) + "\n" + "\n".join(python_code.imports) + "\n\n" + + def create_dynamic_module( block_type_name: str, python_code: PythonCode, module_name: str ) -> types.ModuleType: - imports = "\n".join(IMPORTS_LINES) + "\n" + "\n".join(python_code.imports) + "\n\n" + imports = _get_python_code_imports(python_code) code = python_code.run_function_code if python_code.init_function_code: code += "\n\n" + python_code.init_function_code diff --git a/inference/usage_tracking/collector.py b/inference/usage_tracking/collector.py index 5a0e6801ba..80ddcb714c 100644 --- a/inference/usage_tracking/collector.py +++ b/inference/usage_tracking/collector.py @@ -570,6 +570,7 @@ def _extract_usage_params_from_func_kwargs( execution_duration: float, func: Callable[[Any], Any], category: Literal["model", "workflows", "request"], + exc: Optional[str], args: List[Any], kwargs: Dict[str, Any], ) -> Dict[str, Any]: @@ -581,6 +582,8 @@ def _extract_usage_params_from_func_kwargs( resource_details["dedicated_deployment_id"] = DEDICATED_DEPLOYMENT_ID if DEVICE_ID: resource_details["device_id"] = DEVICE_ID + if exc is not None: + resource_details["error"] = exc resource_id = "" # TODO: add requires_api_key, True if workflow definition comes from platform or model comes from workspace if category == "workflows": @@ -689,28 +692,51 @@ def sync_wrapper( usage_billable: bool = True, **kwargs: P.kwargs, ) -> T: - t1 = time.time() - res = func(*args, **kwargs) - t2 = time.time() - if GCP_SERVERLESS is True: - execution_duration = max(t2 - t1, 0.1) - else: - execution_duration = t2 - t1 - self.record_usage( - **self._extract_usage_params_from_func_kwargs( - usage_fps=usage_fps, - usage_api_key=usage_api_key, - usage_workflow_id=usage_workflow_id, - usage_workflow_preview=usage_workflow_preview, - usage_inference_test_run=usage_inference_test_run, - usage_billable=usage_billable, - execution_duration=execution_duration, - func=func, - category=category, - args=args, - kwargs=kwargs, + try: + t1 = time.time() + res = func(*args, **kwargs) + t2 = time.time() + if GCP_SERVERLESS is True: + execution_duration = max(t2 - t1, 0.1) + else: + execution_duration = t2 - t1 + self.record_usage( + **self._extract_usage_params_from_func_kwargs( + usage_fps=usage_fps, + usage_api_key=usage_api_key, + usage_workflow_id=usage_workflow_id, + usage_workflow_preview=usage_workflow_preview, + usage_inference_test_run=usage_inference_test_run, + usage_billable=usage_billable, + execution_duration=execution_duration, + func=func, + category=category, + exc=None, + args=args, + kwargs=kwargs, + ) ) - ) + except Exception as exc: + if GCP_SERVERLESS is True: + t2 = time.time() + execution_duration = max(t2 - t1, 0.1) + self.record_usage( + **self._extract_usage_params_from_func_kwargs( + usage_fps=usage_fps, + usage_api_key=usage_api_key, + usage_workflow_id=usage_workflow_id, + usage_workflow_preview=usage_workflow_preview, + usage_inference_test_run=usage_inference_test_run, + usage_billable=usage_billable, + execution_duration=execution_duration, + func=func, + category=category, + exc=str(exc), + args=args, + kwargs=kwargs, + ) + ) + raise return res @wraps(func) @@ -724,28 +750,51 @@ async def async_wrapper( usage_billable: bool = True, **kwargs: P.kwargs, ) -> T: - t1 = time.time() - res = await func(*args, **kwargs) - t2 = time.time() - if GCP_SERVERLESS is True: - execution_duration = max(t2 - t1, 0.1) - else: - execution_duration = t2 - t1 - await self.async_record_usage( - **self._extract_usage_params_from_func_kwargs( - usage_fps=usage_fps, - usage_api_key=usage_api_key, - usage_workflow_id=usage_workflow_id, - usage_workflow_preview=usage_workflow_preview, - usage_inference_test_run=usage_inference_test_run, - usage_billable=usage_billable, - execution_duration=execution_duration, - func=func, - category=category, - args=args, - kwargs=kwargs, + try: + t1 = time.time() + res = await func(*args, **kwargs) + t2 = time.time() + if GCP_SERVERLESS is True: + execution_duration = max(t2 - t1, 0.1) + else: + execution_duration = t2 - t1 + await self.async_record_usage( + **self._extract_usage_params_from_func_kwargs( + usage_fps=usage_fps, + usage_api_key=usage_api_key, + usage_workflow_id=usage_workflow_id, + usage_workflow_preview=usage_workflow_preview, + usage_inference_test_run=usage_inference_test_run, + usage_billable=usage_billable, + execution_duration=execution_duration, + func=func, + category=category, + exc=None, + args=args, + kwargs=kwargs, + ) ) - ) + except Exception as exc: + if GCP_SERVERLESS is True: + t2 = time.time() + execution_duration = max(t2 - t1, 0.1) + await self.async_record_usage( + **self._extract_usage_params_from_func_kwargs( + usage_fps=usage_fps, + usage_api_key=usage_api_key, + usage_workflow_id=usage_workflow_id, + usage_workflow_preview=usage_workflow_preview, + usage_inference_test_run=usage_inference_test_run, + usage_billable=usage_billable, + execution_duration=execution_duration, + func=func, + category=category, + exc=str(exc), + args=args, + kwargs=kwargs, + ) + ) + raise return res if asyncio.iscoroutinefunction(func): diff --git a/inference_experimental/inference_exp/models/auto_loaders/models_registry.py b/inference_experimental/inference_exp/models/auto_loaders/models_registry.py index b641d14a6b..a1dd317307 100644 --- a/inference_experimental/inference_exp/models/auto_loaders/models_registry.py +++ b/inference_experimental/inference_exp/models/auto_loaders/models_registry.py @@ -121,6 +121,14 @@ module_name="inference_exp.models.paligemma.paligemma_hf", class_name="PaliGemmaHF", ), + ("smolvlm-v2", VLM_TASK, BackendType.HF): LazyClass( + module_name="inference_exp.models.smolvlm.smolvlm_hf", + class_name="SmolVLMHF", + ), + ("qwen25vl", VLM_TASK, BackendType.HF): LazyClass( + module_name="inference_exp.models.qwen25vl.qwen25vl_hf", + class_name="Qwen25VLHF", + ), ("florence-2", VLM_TASK, BackendType.HF): LazyClass( module_name="inference_exp.models.florence2.florence2_hf", class_name="Florence2HF", @@ -145,6 +153,10 @@ module_name="inference_exp.models.rfdetr.rfdetr_object_detection_pytorch", class_name="RFDetrForObjectDetectionTorch", ), + ("moondream2", VLM_TASK, BackendType.HF): LazyClass( + module_name="inference_exp.models.moondream2.moondream2_hf", + class_name="MoonDream2HF", + ), } diff --git a/inference_experimental/inference_exp/models/florence2/florence2_hf.py b/inference_experimental/inference_exp/models/florence2/florence2_hf.py index bbd8cc92e2..357523694b 100644 --- a/inference_experimental/inference_exp/models/florence2/florence2_hf.py +++ b/inference_experimental/inference_exp/models/florence2/florence2_hf.py @@ -4,7 +4,7 @@ import cv2 import numpy as np import torch -from peft import LoraConfig, PeftModel +from peft import PeftModel from inference_exp import Detections, InstanceDetections from inference_exp.configuration import DEFAULT_DEVICE from inference_exp.entities import ImageDimensions diff --git a/inference_experimental/inference_exp/models/paligemma/paligemma_hf.py b/inference_experimental/inference_exp/models/paligemma/paligemma_hf.py index e586b3640a..0cf5d0d064 100644 --- a/inference_experimental/inference_exp/models/paligemma/paligemma_hf.py +++ b/inference_experimental/inference_exp/models/paligemma/paligemma_hf.py @@ -1,8 +1,11 @@ -from typing import List, Union +from typing import List, Union, Optional +import os import numpy as np import torch +from peft import PeftModel from inference_exp.configuration import DEFAULT_DEVICE +from inference_exp.entities import ColorFormat from transformers import AutoProcessor, PaliGemmaForConditionalGeneration @@ -15,14 +18,35 @@ def from_pretrained( device: torch.device = DEFAULT_DEVICE, **kwargs, ) -> "PaliGemmaHF": - # TODO: Add int4/int8 inference torch_dtype = torch.float16 if device.type == "cuda" else torch.float32 - model = PaliGemmaForConditionalGeneration.from_pretrained( - model_name_or_path, - torch_dtype=torch_dtype, - device_map=device, - ).eval() - processor = AutoProcessor.from_pretrained(model_name_or_path) + + adapter_config_path = os.path.join(model_name_or_path, "adapter_config.json") + if os.path.exists(adapter_config_path): + base_model_path = os.path.join(model_name_or_path, "base") + model = PaliGemmaForConditionalGeneration.from_pretrained( + base_model_path, + torch_dtype=torch_dtype, + trust_remote_code=True, + local_files_only=True, + ) + model = PeftModel.from_pretrained(model, model_name_or_path) + model.merge_and_unload() + model.to(device) + + processor = AutoProcessor.from_pretrained( + base_model_path, trust_remote_code=True, local_files_only=True + ) + else: + model = PaliGemmaForConditionalGeneration.from_pretrained( + model_name_or_path, + torch_dtype=torch_dtype, + device_map=device, + trust_remote_code=True, + local_files_only=True, + ).eval() + processor = AutoProcessor.from_pretrained( + model_name_or_path, trust_remote_code=True, local_files_only=True + ) return cls( model=model, processor=processor, device=device, torch_dtype=torch_dtype ) @@ -43,12 +67,15 @@ def prompt( self, images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]], prompt: str, + input_color_format: Optional[ColorFormat] = None, max_new_tokens: int = 400, do_sample: bool = False, skip_special_tokens: bool = True, **kwargs, ) -> List[str]: - inputs = self.pre_process_generation(images=images, prompt=prompt) + inputs = self.pre_process_generation( + images=images, prompt=prompt, input_color_format=input_color_format + ) generated_ids = self.generate( inputs=inputs, max_new_tokens=max_new_tokens, @@ -63,9 +90,31 @@ def pre_process_generation( self, images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]], prompt: str, + input_color_format: Optional[ColorFormat] = None, **kwargs, ) -> dict: - return self._processor(text=prompt, images=images, return_tensors="pt").to( + def _to_tensor(image: Union[np.ndarray, torch.Tensor]) -> torch.Tensor: + is_numpy = isinstance(image, np.ndarray) + if is_numpy: + tensor_image = torch.from_numpy(image.copy()).permute(2, 0, 1) + else: + tensor_image = image + if input_color_format == "bgr" or (is_numpy and input_color_format is None): + tensor_image = tensor_image[[2, 1, 0], :, :] + return tensor_image + + if isinstance(images, torch.Tensor) and images.ndim > 3: + image_list = [_to_tensor(img) for img in images] + elif not isinstance(images, list): + image_list = [_to_tensor(images)] + else: + image_list = [_to_tensor(img) for img in images] + + num_images = len(image_list) + + if isinstance(prompt, str) and num_images > 1: + prompt = [prompt] * num_images + return self._processor(text=prompt, images=image_list, return_tensors="pt").to( self._device ) diff --git a/inference_experimental/inference_exp/models/qwen25vl/__init__.py b/inference_experimental/inference_exp/models/qwen25vl/__init__.py new file mode 100644 index 0000000000..967e7accf8 --- /dev/null +++ b/inference_experimental/inference_exp/models/qwen25vl/__init__.py @@ -0,0 +1 @@ +# This file makes the qwen25vl directory a Python package diff --git a/inference_experimental/inference_exp/models/qwen25vl/qwen25vl_hf.py b/inference_experimental/inference_exp/models/qwen25vl/qwen25vl_hf.py new file mode 100644 index 0000000000..63287cf7e6 --- /dev/null +++ b/inference_experimental/inference_exp/models/qwen25vl/qwen25vl_hf.py @@ -0,0 +1,214 @@ +from typing import List, Union +import os + +import numpy as np +import torch +from peft import PeftModel +from inference_exp.configuration import DEFAULT_DEVICE +from inference_exp.entities import ColorFormat +from transformers import ( + AutoProcessor, + Qwen2_5_VLForConditionalGeneration, + Qwen2_5_VLConfig, + AutoModelForCausalLM, +) + +AutoModelForCausalLM.register( + config_class=Qwen2_5_VLConfig, model_class=Qwen2_5_VLForConditionalGeneration +) + + +class Qwen25VLHF: + @classmethod + def from_pretrained( + cls, + model_name_or_path: str, + device: torch.device = DEFAULT_DEVICE, + **kwargs, + ) -> "Qwen25VLHF": + torch_dtype = torch.bfloat16 if device.type == "cuda" else torch.float32 + + adapter_config_path = os.path.join(model_name_or_path, "adapter_config.json") + if os.path.exists(adapter_config_path): + base_model_path = os.path.join(model_name_or_path, "base") + model = Qwen2_5_VLForConditionalGeneration.from_pretrained( + base_model_path, + torch_dtype=torch_dtype, + trust_remote_code=True, + local_files_only=True, + ) + model = PeftModel.from_pretrained(model, model_name_or_path) + model.merge_and_unload() + model.to(device) + + processor = AutoProcessor.from_pretrained( + base_model_path, trust_remote_code=True, local_files_only=True + ) + else: + model = Qwen2_5_VLForConditionalGeneration.from_pretrained( + model_name_or_path, + torch_dtype=torch_dtype, + device_map=device, + trust_remote_code=True, + local_files_only=True, + ).eval() + processor = AutoProcessor.from_pretrained( + model_name_or_path, trust_remote_code=True, local_files_only=True + ) + return cls( + model=model, processor=processor, device=device, torch_dtype=torch_dtype + ) + + def __init__( + self, + model: Qwen2_5_VLForConditionalGeneration, + processor: AutoProcessor, + device: torch.device, + torch_dtype: torch.dtype, + ): + self._model = model + self._processor = processor + self._device = device + self._torch_dtype = torch_dtype + self.default_system_prompt = ( + "You are a Qwen2.5-VL model that can answer questions about any image." + ) + + def prompt( + self, + images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]], + prompt: str = None, + input_color_format: ColorFormat = None, + max_new_tokens: int = 512, + do_sample: bool = False, + skip_special_tokens: bool = False, + **kwargs, + ) -> List[str]: + inputs = self.pre_process_generation( + images=images, prompt=prompt, input_color_format=input_color_format + ) + generated_ids = self.generate( + inputs=inputs, + max_new_tokens=max_new_tokens, + do_sample=do_sample, + ) + return self.post_process_generation( + generated_ids=generated_ids, + skip_special_tokens=skip_special_tokens, + ) + + def pre_process_generation( + self, + images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]], + prompt: str = None, + input_color_format: ColorFormat = None, + **kwargs, + ) -> dict: + def _to_tensor(image: Union[np.ndarray, torch.Tensor]) -> torch.Tensor: + is_numpy = isinstance(image, np.ndarray) + if is_numpy: + tensor_image = torch.from_numpy(image.copy()).permute(2, 0, 1) + else: + tensor_image = image + if input_color_format == "bgr" or (is_numpy and input_color_format is None): + tensor_image = tensor_image[[2, 1, 0], :, :] + return tensor_image + + if isinstance(images, torch.Tensor) and images.ndim > 3: + image_list = [_to_tensor(img) for img in images] + elif not isinstance(images, list): + image_list = [_to_tensor(images)] + else: + image_list = [_to_tensor(img) for img in images] + # Handle prompt and system prompt parsing logic from original implementation + if prompt is None: + prompt = "" + system_prompt = self.default_system_prompt + else: + split_prompt = prompt.split("") + if len(split_prompt) == 1: + prompt = split_prompt[0] + system_prompt = self.default_system_prompt + else: + prompt = split_prompt[0] + system_prompt = split_prompt[1] + + # Construct conversation following original implementation structure + conversation = [ + { + "role": "system", + "content": [{"type": "text", "text": system_prompt}], + }, + { + "role": "user", + "content": [ + {"type": "image"}, # Processor will handle the actual image + {"type": "text", "text": prompt}, + ], + }, + ] + + # Apply chat template + text_input = self._processor.apply_chat_template( + conversation, tokenize=False, add_generation_prompt=True + ) + + # Process inputs - processor will handle tensor/array inputs directly + model_inputs = self._processor( + text=text_input, + images=image_list, + return_tensors="pt", + padding=True, + ) + + # Move inputs to device + model_inputs = { + k: v.to(self._device) + for k, v in model_inputs.items() + if isinstance(v, torch.Tensor) + } + + return model_inputs + + def generate( + self, + inputs: dict, + max_new_tokens: int = 512, + do_sample: bool = False, + **kwargs, + ) -> torch.Tensor: + input_len = inputs["input_ids"].shape[-1] + + with torch.inference_mode(): + generation = self._model.generate( + **inputs, + max_new_tokens=max_new_tokens, + do_sample=do_sample, + pad_token_id=self._processor.tokenizer.pad_token_id, + eos_token_id=self._processor.tokenizer.eos_token_id, + bos_token_id=self._processor.tokenizer.bos_token_id, + ) + + # Return only the newly generated tokens + return generation[:, input_len:] + + def post_process_generation( + self, + generated_ids: torch.Tensor, + skip_special_tokens: bool = False, + **kwargs, + ) -> List[str]: + # Decode the generated tokens + decoded = self._processor.batch_decode( + generated_ids, + skip_special_tokens=skip_special_tokens, + ) + + # Apply the same post-processing as original implementation + result = [] + for text in decoded: + text = text.replace("assistant\n", "") + text = text.replace(" addCriterion\n", "") + result.append(text.strip()) + + return result diff --git a/inference_experimental/inference_exp/models/smolvlm/smolvlm_hf.py b/inference_experimental/inference_exp/models/smolvlm/smolvlm_hf.py index 017c78d9d8..2754b61895 100644 --- a/inference_experimental/inference_exp/models/smolvlm/smolvlm_hf.py +++ b/inference_experimental/inference_exp/models/smolvlm/smolvlm_hf.py @@ -1,10 +1,11 @@ from typing import List, Optional, Union +import os import numpy as np import torch +from peft import PeftModel from inference_exp.configuration import DEFAULT_DEVICE from inference_exp.entities import ColorFormat -from inference_exp.models.common.roboflow.pre_processing import images_to_pillow from transformers import AutoModelForImageTextToText, AutoProcessor @@ -18,14 +19,42 @@ def from_pretrained( **kwargs, ) -> "SmolVLMHF": torch_dtype = torch.float16 if device.type == "cuda" else torch.float32 - model = AutoModelForImageTextToText.from_pretrained( - model_name_or_path, - torch_dtype=torch_dtype, - device_map=device, - ).eval() - processor = AutoProcessor.from_pretrained( - model_name_or_path, padding_side="left" - ) + + adapter_config_path = os.path.join(model_name_or_path, "adapter_config.json") + if os.path.exists(adapter_config_path): + + base_model_path = os.path.join(model_name_or_path, "base") + model = AutoModelForImageTextToText.from_pretrained( + base_model_path, + torch_dtype=torch_dtype, + trust_remote_code=True, + local_files_only=True, + ) + model = PeftModel.from_pretrained(model, model_name_or_path) + model.merge_and_unload() + model.to(device) + + processor = AutoProcessor.from_pretrained( + base_model_path, + padding_side="left", + trust_remote_code=True, + local_files_only=True, + ) + else: + print("smolvlm_hf.from_pretrained", "no adapter_config.json") + model = AutoModelForImageTextToText.from_pretrained( + model_name_or_path, + torch_dtype=torch_dtype, + device_map=device, + trust_remote_code=True, + local_files_only=True, + ).eval() + processor = AutoProcessor.from_pretrained( + model_name_or_path, + padding_side="left", + trust_remote_code=True, + local_files_only=True, + ) return cls( model=model, processor=processor, device=device, torch_dtype=torch_dtype ) @@ -77,20 +106,48 @@ def pre_process_generation( input_color_format: Optional[ColorFormat] = None, **kwargs, ) -> dict: - messages = prepare_chat_messages( - images=images, - prompt=prompt, - images_to_single_prompt=images_to_single_prompt, - input_color_format=input_color_format, + def _to_tensor(image: Union[np.ndarray, torch.Tensor]) -> torch.Tensor: + is_numpy = isinstance(image, np.ndarray) + if is_numpy: + tensor_image = torch.from_numpy(image.copy()).permute(2, 0, 1) + else: + tensor_image = image + if input_color_format == "bgr" or (is_numpy and input_color_format is None): + tensor_image = tensor_image[[2, 1, 0], :, :] + return tensor_image + + if isinstance(images, torch.Tensor) and images.ndim > 3: + image_list = [_to_tensor(img) for img in images] + elif not isinstance(images, list): + image_list = [_to_tensor(images)] + else: + image_list = [_to_tensor(img) for img in images] + + if images_to_single_prompt: + content = [{"type": "image"}] * len(image_list) + content.append({"type": "text", "text": prompt}) + conversations = [[{"role": "user", "content": content}]] + else: + conversations = [] + for _ in image_list: + conversations.append( + [ + { + "role": "user", + "content": [ + {"type": "image"}, + {"type": "text", "text": prompt}, + ], + } + ] + ) + text_prompts = self._processor.apply_chat_template( + conversations, add_generation_prompt=True + ) + inputs = self._processor( + text=text_prompts, images=image_list, return_tensors="pt", padding=True ) - return self._processor.apply_chat_template( - messages, - add_generation_prompt=True, - tokenize=True, - return_dict=True, - return_tensors="pt", - padding=len(messages) > 1, - ).to(self._device, dtype=self._torch_dtype) + return inputs.to(self._device, dtype=self._torch_dtype) def generate( self, @@ -115,41 +172,3 @@ def post_process_generation( generated_ids, skip_special_tokens=skip_special_tokens ) return [result.strip() for result in decoded] - - -def prepare_chat_messages( - images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]], - prompt: str, - images_to_single_prompt: bool, - input_color_format: Optional[ColorFormat] = None, -) -> List[List[dict]]: - pillow_images, _ = images_to_pillow( - images=images, input_color_format=input_color_format, model_color_format="rgb" - ) - if images_to_single_prompt: - content = [] - for image in pillow_images: - content.append({"type": "image", "image": image}) - content.append({"type": "text", "text": prompt}) - return [ - [ - { - "role": "user", - "content": content, - }, - ] - ] - result = [] - for image in pillow_images: - result.append( - [ - { - "role": "user", - "content": [ - {"type": "image", "image": image}, - {"type": "text", "text": prompt}, - ], - }, - ] - ) - return result diff --git a/inference_experimental/tests/integration_tests/e2e/test_moondream2_e2e.py b/inference_experimental/tests/integration_tests/e2e/test_moondream2_e2e.py new file mode 100644 index 0000000000..dfe163725f --- /dev/null +++ b/inference_experimental/tests/integration_tests/e2e/test_moondream2_e2e.py @@ -0,0 +1,22 @@ +import numpy as np +import pytest +from inference_exp import AutoModel + + +@pytest.mark.e2e_model_inference +@pytest.mark.slow +def test_moondream2_model(dog_image_numpy: np.ndarray): + # GIVEN + model = AutoModel.from_pretrained("moondream2") + + # WHEN + answer = model.query(images=dog_image_numpy, question="What is in the image?") + + # THEN + assert isinstance(answer, list) + assert len(answer) == 1 + assert isinstance(answer[0], str) + assert ( + answer[0] + == "The image features a man carrying a beagle on his back, with the dog sitting on his shoulder." + ) diff --git a/inference_experimental/tests/integration_tests/e2e/test_paligemma_e2e.py b/inference_experimental/tests/integration_tests/e2e/test_paligemma_e2e.py new file mode 100644 index 0000000000..24975662b7 --- /dev/null +++ b/inference_experimental/tests/integration_tests/e2e/test_paligemma_e2e.py @@ -0,0 +1,33 @@ +import numpy as np +import pytest +from inference_exp import AutoModel + + +@pytest.mark.e2e_model_inference +def test_paligemma_base_model(dog_image_numpy: np.ndarray): + # GIVEN + model = AutoModel.from_pretrained("paligemma2-3b-pt-224") + + # WHEN + captions = model.prompt(images=dog_image_numpy, prompt="What is in the image?") + + # THEN + assert isinstance(captions, list) + assert len(captions) == 1 + assert isinstance(captions[0], str) + assert captions[0] == "Dog." + + +@pytest.mark.e2e_model_inference +def test_paligemma_lora_model(dog_image_numpy: np.ndarray): + # GIVEN + model = AutoModel.from_pretrained("paligemma-lora-test") + + # WHEN + captions = model.prompt(images=dog_image_numpy, prompt="What is in the image?") + + # THEN + assert isinstance(captions, list) + assert len(captions) == 1 + assert isinstance(captions[0], str) + assert captions[0] == "Dog." diff --git a/inference_experimental/tests/integration_tests/e2e/test_qwen25vl_e2e.py b/inference_experimental/tests/integration_tests/e2e/test_qwen25vl_e2e.py new file mode 100644 index 0000000000..fca91db2ad --- /dev/null +++ b/inference_experimental/tests/integration_tests/e2e/test_qwen25vl_e2e.py @@ -0,0 +1,41 @@ +import numpy as np +import pytest +from inference_exp import AutoModel + + +@pytest.mark.e2e_model_inference +@pytest.mark.slow +def test_qwen25vl_base_model(dog_image_numpy: np.ndarray): + # GIVEN + model = AutoModel.from_pretrained("qwen25vl-7b") + + # WHEN + captions = model.prompt(images=dog_image_numpy, prompt="What is in the image?") + + # THEN + assert isinstance(captions, list) + assert len(captions) == 1 + assert isinstance(captions[0], str) + assert ( + captions[0] + == "The image shows a person carrying a Beagle dog on their shoulders. The dog appears to be happy, with its tongue out and looking upwards. The person is wearing a white shirt, a black cap, and a backpack. The background includes a street scene with buildings and a clear sky.<|im_end|>" + ) + + +@pytest.mark.e2e_model_inference +@pytest.mark.slow +def test_qwen25vl_lora_model(dog_image_numpy: np.ndarray): + # GIVEN + model = AutoModel.from_pretrained("qwen-lora-test") + + # WHEN + captions = model.prompt(images=dog_image_numpy, prompt="What is in the image?") + + # THEN + assert isinstance(captions, list) + assert len(captions) == 1 + assert isinstance(captions[0], str) + assert ( + captions[0] + == "The image shows a person carrying a Beagle dog on their shoulders. The dog appears to be happy, with its tongue out and looking upwards. The person is wearing a white shirt, a black cap, and a backpack. The background includes a street scene with buildings and a clear sky.<|im_end|>" + ) diff --git a/inference_experimental/tests/integration_tests/e2e/test_smolvlm_e2e.py b/inference_experimental/tests/integration_tests/e2e/test_smolvlm_e2e.py new file mode 100644 index 0000000000..fcfb761ad2 --- /dev/null +++ b/inference_experimental/tests/integration_tests/e2e/test_smolvlm_e2e.py @@ -0,0 +1,33 @@ +import numpy as np +import pytest +from inference_exp import AutoModel + + +@pytest.mark.e2e_model_inference +def test_smolvlm_base_model(dog_image_numpy: np.ndarray): + # GIVEN + model = AutoModel.from_pretrained("smolvlm-256m") + + # WHEN + captions = model.prompt(images=dog_image_numpy, prompt="What is in the image?") + + # THEN + assert isinstance(captions, list) + assert len(captions) == 1 + assert isinstance(captions[0], str) + assert captions[0] == "There is a person and a dog in the image." + + +@pytest.mark.e2e_model_inference +def test_smolvlm_lora_model(dog_image_numpy: np.ndarray): + # GIVEN + model = AutoModel.from_pretrained("smolvlm-lora-test") + + # WHEN + captions = model.prompt(images=dog_image_numpy, prompt="What is in the image?") + + # THEN + assert isinstance(captions, list) + assert len(captions) == 1 + assert isinstance(captions[0], str) + assert captions[0] == "There is a man in the image." diff --git a/inference_experimental/tests/integration_tests/models/conftest.py b/inference_experimental/tests/integration_tests/models/conftest.py index 73651ac162..413636bcc5 100644 --- a/inference_experimental/tests/integration_tests/models/conftest.py +++ b/inference_experimental/tests/integration_tests/models/conftest.py @@ -23,6 +23,16 @@ FLORENCE2_LARGE_FT_URL = ( "https://storage.googleapis.com/roboflow-tests-assets/florence2/large-ft.zip" ) +QWEN25VL_3B_FT_URL = ( + "https://storage.googleapis.com/roboflow-tests-assets/qwen/qwen25vl-3b.zip" +) +PALIGEMMA_BASE_FT_URL = "https://storage.googleapis.com/roboflow-tests-assets/paligemma/paligemma2-3b-pt-224.zip" +SMOLVLM_BASE_FT_URL = ( + "https://storage.googleapis.com/roboflow-tests-assets/smolvlm/smolvlm-256m.zip" +) +MOONDREAM2_BASE_FT_URL = ( + "https://storage.googleapis.com/roboflow-tests-assets/moondream2/moondream2-2b.zip" +) OCR_TEST_IMAGE_PATH = os.path.join(ASSETS_DIR, "ocr_test_image.png") @@ -114,3 +124,63 @@ def florence2_large_ft_path() -> str: with zipfile.ZipFile(zip_path, "r") as zip_ref: zip_ref.extractall(package_dir) return unzipped_package_path + + +@pytest.fixture(scope="module") +def qwen25vl_3b_path() -> str: + package_dir = os.path.join(MODELS_DIR, "qwen25vl-3b") + unzipped_package_path = os.path.join(package_dir, "weights") + os.makedirs(package_dir, exist_ok=True) + zip_path = os.path.join(package_dir, "qwen25vl-3b.zip") + _download_if_not_exists(file_path=zip_path, url=QWEN25VL_3B_FT_URL) + lock_path = f"{unzipped_package_path}.lock" + with FileLock(lock_path, timeout=120): + if not os.path.exists(unzipped_package_path): + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(package_dir) + return unzipped_package_path + + +@pytest.fixture(scope="module") +def paligemma_3b_224_path() -> str: + package_dir = os.path.join(MODELS_DIR, "paligemma2-3b-pt-224") + unzipped_package_path = os.path.join(package_dir, "weights") + os.makedirs(package_dir, exist_ok=True) + zip_path = os.path.join(package_dir, "paligemma2-3b-pt-224.zip") + _download_if_not_exists(file_path=zip_path, url=PALIGEMMA_BASE_FT_URL) + lock_path = f"{unzipped_package_path}.lock" + with FileLock(lock_path, timeout=120): + if not os.path.exists(unzipped_package_path): + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(package_dir) + return unzipped_package_path + + +@pytest.fixture(scope="module") +def smolvlm_256m_path() -> str: + package_dir = os.path.join(MODELS_DIR, "smolvlm-256m") + unzipped_package_path = os.path.join(package_dir, "weights") + os.makedirs(package_dir, exist_ok=True) + zip_path = os.path.join(package_dir, "smolvlm-256m.zip") + _download_if_not_exists(file_path=zip_path, url=SMOLVLM_BASE_FT_URL) + lock_path = f"{unzipped_package_path}.lock" + with FileLock(lock_path, timeout=120): + if not os.path.exists(unzipped_package_path): + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(package_dir) + return unzipped_package_path + + +@pytest.fixture(scope="module") +def moondream2_path() -> str: + package_dir = os.path.join(MODELS_DIR, "moondream2") + unzipped_package_path = os.path.join(package_dir, "moondream2-2b") + os.makedirs(package_dir, exist_ok=True) + zip_path = os.path.join(package_dir, "moondream2-2b.zip") + _download_if_not_exists(file_path=zip_path, url=MOONDREAM2_BASE_FT_URL) + lock_path = f"{unzipped_package_path}.lock" + with FileLock(lock_path, timeout=120): + if not os.path.exists(unzipped_package_path): + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(package_dir) + return unzipped_package_path diff --git a/inference_experimental/tests/integration_tests/models/test_clip_predictions.py b/inference_experimental/tests/integration_tests/models/test_clip_predictions.py index ea15fe66d8..cca15cf306 100644 --- a/inference_experimental/tests/integration_tests/models/test_clip_predictions.py +++ b/inference_experimental/tests/integration_tests/models/test_clip_predictions.py @@ -1181,7 +1181,7 @@ def test_clip_onnx_image_prediction_for_numpy( # then assert tuple(embeddings.shape) == (1, 1024) - assert torch.allclose(embeddings, EXPECTED_DOG_IMAGE_EMBEDDING, atol=1e-4) + assert torch.allclose(embeddings, EXPECTED_DOG_IMAGE_EMBEDDING, atol=1e-3) @pytest.mark.slow @@ -1203,7 +1203,7 @@ def test_clip_onnx_image_prediction_for_torch_tensor( # then assert tuple(embeddings.shape) == (1, 1024) - assert torch.allclose(embeddings, EXPECTED_DOG_IMAGE_EMBEDDING, atol=1e-4) + assert torch.allclose(embeddings, EXPECTED_DOG_IMAGE_EMBEDDING, atol=1e-3) @pytest.mark.slow diff --git a/inference_experimental/tests/integration_tests/models/test_moondream2_predictions.py b/inference_experimental/tests/integration_tests/models/test_moondream2_predictions.py new file mode 100644 index 0000000000..cc109174b9 --- /dev/null +++ b/inference_experimental/tests/integration_tests/models/test_moondream2_predictions.py @@ -0,0 +1,84 @@ +import numpy as np +import pytest +import torch +from inference_exp.models.moondream2.moondream2_hf import MoonDream2HF, Points +from inference_exp import Detections + + +@pytest.fixture(scope="module") +def moondream2_model(moondream2_path: str) -> MoonDream2HF: + return MoonDream2HF.from_pretrained(moondream2_path) + + +@pytest.mark.slow +def test_detect(moondream2_model: MoonDream2HF, dog_image_numpy: np.ndarray): + # when + detections = moondream2_model.detect( + images=dog_image_numpy, classes=["dog", "person"] + ) + + # then + assert isinstance(detections, list) + assert len(detections) == 1 + assert isinstance(detections[0], Detections) + assert len(detections[0].xyxy) == 2 + assert torch.allclose( + detections[0].xyxy, + torch.tensor([[64, 253, 628, 925], [0, 358, 646, 1277]], dtype=torch.int32), + ) + assert torch.allclose( + detections[0].class_id, + torch.tensor([0, 1], dtype=torch.int32), + ) + + +@pytest.mark.slow +def test_caption(moondream2_model: MoonDream2HF, dog_image_numpy: np.ndarray): + # when + caption = moondream2_model.caption(images=dog_image_numpy) + + # then + assert isinstance(caption, list) + assert len(caption) == 1 + assert isinstance(caption[0], str) + assert ( + caption[0] + == "A person wearing a black baseball cap and a white t-shirt is carrying a beagle on their back. The beagle, with its light brown and white fur, is sitting comfortably on the person's shoulder, its tongue hanging out in a playful manner. The person is also wearing a black backpack with a white logo. The background features a cityscape with a tall building and a street, with a red car visible in the distance. The sky is a clear blue with a few clouds." + ) + + +@pytest.mark.slow +def test_query(moondream2_model: MoonDream2HF, dog_image_numpy: np.ndarray): + # when + answer = moondream2_model.query( + images=dog_image_numpy, question="What is in the image?" + ) + + # then + assert isinstance(answer, list) + assert len(answer) == 1 + assert isinstance(answer[0], str) + assert ( + answer[0] + == "The image features a man carrying a beagle on his back, with the dog sitting on his shoulder." + ) + + +@pytest.mark.slow +def test_point(moondream2_model: MoonDream2HF, dog_image_numpy: np.ndarray): + # when + points = moondream2_model.point(images=dog_image_numpy, classes=["dog", "person"]) + + # then + assert isinstance(points, list) + assert len(points) == 1 + assert isinstance(points[0], Points) + assert len(points[0].xy) == 2 + assert torch.allclose( + points[0].xy, + torch.tensor([[367, 355], [323, 872]], dtype=torch.int32), + ) + assert torch.allclose( + points[0].class_id, + torch.tensor([0, 1], dtype=torch.int32), + ) diff --git a/inference_experimental/tests/integration_tests/models/test_paligemma_predictions.py b/inference_experimental/tests/integration_tests/models/test_paligemma_predictions.py new file mode 100644 index 0000000000..5274800f9b --- /dev/null +++ b/inference_experimental/tests/integration_tests/models/test_paligemma_predictions.py @@ -0,0 +1,29 @@ +import numpy as np +import pytest + +from inference_exp.models.paligemma.paligemma_hf import PaliGemmaHF + + +@pytest.fixture(scope="module") +def paligemma_model(paligemma_3b_224_path: str) -> PaliGemmaHF: + return PaliGemmaHF.from_pretrained(paligemma_3b_224_path) + + +@pytest.mark.slow +def test_prompt(paligemma_model: PaliGemmaHF, dog_image_numpy: np.ndarray): + # when + result = paligemma_model.prompt( + images=dog_image_numpy, prompt="What is in the image?" + ) + # then + assert result == ["Dog."] + + +@pytest.mark.slow +def test_prompt_dog_type(paligemma_model: PaliGemmaHF, dog_image_numpy: np.ndarray): + # when + result = paligemma_model.prompt( + images=dog_image_numpy, prompt="What type of dog is this?" + ) + # then + assert result == ["beagle"] diff --git a/inference_experimental/tests/integration_tests/models/test_paligemma_preprocessing.py b/inference_experimental/tests/integration_tests/models/test_paligemma_preprocessing.py new file mode 100644 index 0000000000..2115ecaff8 --- /dev/null +++ b/inference_experimental/tests/integration_tests/models/test_paligemma_preprocessing.py @@ -0,0 +1,132 @@ +import numpy as np +import pytest +import torch + +from inference_exp.models.paligemma.paligemma_hf import PaliGemmaHF + + +@pytest.fixture(scope="module") +def paligemma_model(paligemma_3b_224_path: str) -> PaliGemmaHF: + return PaliGemmaHF.from_pretrained(paligemma_3b_224_path) + + +def get_preprocessed_outputs( + paligemma_model: PaliGemmaHF, + dog_image_numpy: np.ndarray, + dog_image_torch: torch.Tensor, +): + prompt = "caption" + # Process single numpy image (BGR) + numpy_output = paligemma_model.pre_process_generation( + images=dog_image_numpy, prompt=prompt + ) + + # Process single torch tensor (RGB) + tensor_output = paligemma_model.pre_process_generation( + images=dog_image_torch, prompt=prompt + ) + + # Process list of numpy images + list_numpy_output = paligemma_model.pre_process_generation( + images=[dog_image_numpy, dog_image_numpy], prompt=prompt + ) + + # Process list of torch tensors + list_tensor_output = paligemma_model.pre_process_generation( + images=[dog_image_torch, dog_image_torch], prompt=prompt + ) + + # Process batched tensor + batched_tensor = torch.stack([dog_image_torch, dog_image_torch]) + batched_tensor_output = paligemma_model.pre_process_generation( + images=batched_tensor, prompt=prompt + ) + + return ( + numpy_output, + tensor_output, + list_numpy_output, + list_tensor_output, + batched_tensor_output, + ) + + +@pytest.mark.slow +def test_preprocessed_output_shapes( + paligemma_model: PaliGemmaHF, + dog_image_numpy: np.ndarray, + dog_image_torch: torch.Tensor, +): + # GIVEN + ( + numpy_output, + tensor_output, + list_numpy_output, + list_tensor_output, + batched_tensor_output, + ) = get_preprocessed_outputs(paligemma_model, dog_image_numpy, dog_image_torch) + + # THEN + # Check shapes for single image inputs + assert "pixel_values" in numpy_output and numpy_output["pixel_values"].shape[0] == 1 + assert ( + "pixel_values" in tensor_output and tensor_output["pixel_values"].shape[0] == 1 + ) + + # Check shapes for multi-image inputs + assert ( + "pixel_values" in list_numpy_output + and list_numpy_output["pixel_values"].shape[0] == 2 + ) + assert ( + "pixel_values" in list_tensor_output + and list_tensor_output["pixel_values"].shape[0] == 2 + ) + assert ( + "pixel_values" in batched_tensor_output + and batched_tensor_output["pixel_values"].shape[0] == 2 + ) + + +@pytest.mark.slow +def test_internal_consistency_of_preprocessed_inputs( + paligemma_model: PaliGemmaHF, + dog_image_numpy: np.ndarray, + dog_image_torch: torch.Tensor, +): + # GIVEN + ( + numpy_output, + tensor_output, + list_numpy_output, + list_tensor_output, + batched_tensor_output, + ) = get_preprocessed_outputs(paligemma_model, dog_image_numpy, dog_image_torch) + # The dog_image_numpy is BGR, dog_image_torch is RGB. + # The processor should handle the conversion, but let's compare RGB numpy to RGB tensor + prompt = "caption" + rgb_dog_image_numpy = dog_image_numpy[:, :, ::-1] + numpy_rgb_output = paligemma_model.pre_process_generation( + images=rgb_dog_image_numpy, prompt=prompt, input_color_format="rgb" + ) + + # THEN + # Compare single numpy (RGB) and single tensor (RGB) + assert torch.allclose( + numpy_rgb_output["pixel_values"], tensor_output["pixel_values"], atol=1e-2 + ) + assert torch.allclose( + numpy_rgb_output["input_ids"], tensor_output["input_ids"], atol=1e-2 + ) + + # Compare list of tensors and batched tensor + assert torch.allclose( + list_tensor_output["pixel_values"], + batched_tensor_output["pixel_values"], + atol=1e-2, + ) + assert torch.allclose( + list_tensor_output["input_ids"], + batched_tensor_output["input_ids"], + atol=1e-2, + ) diff --git a/inference_experimental/tests/integration_tests/models/test_qwen25vl_predictions.py b/inference_experimental/tests/integration_tests/models/test_qwen25vl_predictions.py new file mode 100644 index 0000000000..2f371b1d64 --- /dev/null +++ b/inference_experimental/tests/integration_tests/models/test_qwen25vl_predictions.py @@ -0,0 +1,20 @@ +import numpy as np +import pytest + +from inference_exp.models.qwen25vl.qwen25vl_hf import Qwen25VLHF + + +@pytest.fixture(scope="module") +def qwen_model(qwen25vl_3b_path: str) -> Qwen25VLHF: + return Qwen25VLHF.from_pretrained(qwen25vl_3b_path) + + +@pytest.mark.slow +def test_prompt(qwen_model: Qwen25VLHF, dog_image_numpy: np.ndarray): + # when + result = qwen_model.prompt(images=dog_image_numpy, prompt="What is in the image?") + # then + assert ( + result[0] + == "The image shows a person carrying a dog on their back. The dog appears to be a Beagle, with its tongue out and ears floppy. The person is wearing a white shirt and a black cap. They have a backpack on, which has a logo on it. The background includes a street scene with buildings and a clear blue sky.<|im_end|>" + ) diff --git a/inference_experimental/tests/integration_tests/models/test_qwen25vl_preprocessing.py b/inference_experimental/tests/integration_tests/models/test_qwen25vl_preprocessing.py new file mode 100644 index 0000000000..81408920c5 --- /dev/null +++ b/inference_experimental/tests/integration_tests/models/test_qwen25vl_preprocessing.py @@ -0,0 +1,136 @@ +import numpy as np +import pytest +import torch + +from inference_exp.models.qwen25vl.qwen25vl_hf import Qwen25VLHF + + +@pytest.fixture(scope="module") +def qwen_model(qwen25vl_3b_path: str) -> Qwen25VLHF: + return Qwen25VLHF.from_pretrained(qwen25vl_3b_path) + + +def get_preprocessed_outputs( + qwen_model: Qwen25VLHF, + dog_image_numpy: np.ndarray, + dog_image_torch: torch.Tensor, +): + prompt = "What is in the image?" + # Process single numpy image (BGR) + numpy_output = qwen_model.pre_process_generation( + images=dog_image_numpy, prompt=prompt + ) + + # Process single torch tensor (RGB) + tensor_output = qwen_model.pre_process_generation( + images=dog_image_torch, prompt=prompt + ) + + # Process list of numpy images + list_numpy_output = qwen_model.pre_process_generation( + images=[dog_image_numpy, dog_image_numpy], prompt=prompt + ) + + # Process list of torch tensors + list_tensor_output = qwen_model.pre_process_generation( + images=[dog_image_torch, dog_image_torch], prompt=prompt + ) + + # Process batched tensor + batched_tensor = torch.stack([dog_image_torch, dog_image_torch]) + batched_tensor_output = qwen_model.pre_process_generation( + images=batched_tensor, prompt=prompt + ) + + return ( + numpy_output, + tensor_output, + list_numpy_output, + list_tensor_output, + batched_tensor_output, + ) + + +@pytest.mark.slow +def test_preprocessed_output_shapes( + qwen_model: Qwen25VLHF, + dog_image_numpy: np.ndarray, + dog_image_torch: torch.Tensor, +): + # GIVEN + ( + numpy_output, + tensor_output, + list_numpy_output, + list_tensor_output, + batched_tensor_output, + ) = get_preprocessed_outputs(qwen_model, dog_image_numpy, dog_image_torch) + + # THEN + # Check shapes for single image inputs + assert ( + "image_grid_thw" in numpy_output + and numpy_output["image_grid_thw"].shape[0] == 1 + ) + assert ( + "image_grid_thw" in tensor_output + and tensor_output["image_grid_thw"].shape[0] == 1 + ) + + # Check shapes for multi-image inputs + assert ( + "image_grid_thw" in list_numpy_output + and list_numpy_output["image_grid_thw"].shape[0] == 2 + ) + assert ( + "image_grid_thw" in list_tensor_output + and list_tensor_output["image_grid_thw"].shape[0] == 2 + ) + assert ( + "image_grid_thw" in batched_tensor_output + and batched_tensor_output["image_grid_thw"].shape[0] == 2 + ) + + +@pytest.mark.slow +def test_internal_consistency_of_preprocessed_inputs( + qwen_model: Qwen25VLHF, + dog_image_numpy: np.ndarray, + dog_image_torch: torch.Tensor, +): + # GIVEN + ( + numpy_output, + tensor_output, + list_numpy_output, + list_tensor_output, + batched_tensor_output, + ) = get_preprocessed_outputs(qwen_model, dog_image_numpy, dog_image_torch) + # The dog_image_numpy is BGR, dog_image_torch is RGB. + # The processor should handle the conversion, but let's compare RGB numpy to RGB tensor + prompt = "What is in the image?" + rgb_dog_image_numpy = dog_image_numpy[:, :, ::-1] + numpy_rgb_output = qwen_model.pre_process_generation( + images=rgb_dog_image_numpy, prompt=prompt, input_color_format="rgb" + ) + + # THEN + # Compare single numpy (RGB) and single tensor (RGB) + assert torch.allclose( + numpy_rgb_output["pixel_values"], tensor_output["pixel_values"], atol=1e-2 + ) + assert torch.allclose( + numpy_rgb_output["input_ids"], tensor_output["input_ids"], atol=1e-2 + ) + + # Compare list of tensors and batched tensor + assert torch.allclose( + list_tensor_output["pixel_values"], + batched_tensor_output["pixel_values"], + atol=1e-2, + ) + assert torch.allclose( + list_tensor_output["input_ids"], + batched_tensor_output["input_ids"], + atol=1e-2, + ) diff --git a/inference_experimental/tests/integration_tests/models/test_smolvlm_predictions.py b/inference_experimental/tests/integration_tests/models/test_smolvlm_predictions.py new file mode 100644 index 0000000000..5d88cad4ec --- /dev/null +++ b/inference_experimental/tests/integration_tests/models/test_smolvlm_predictions.py @@ -0,0 +1,19 @@ +import numpy as np +import pytest + +from inference_exp.models.smolvlm.smolvlm_hf import SmolVLMHF + + +@pytest.fixture(scope="module") +def smolvlm_model(smolvlm_256m_path: str) -> SmolVLMHF: + return SmolVLMHF.from_pretrained(smolvlm_256m_path) + + +@pytest.mark.slow +def test_prompt(smolvlm_model: SmolVLMHF, dog_image_numpy: np.ndarray): + # when + result = smolvlm_model.prompt( + images=dog_image_numpy, prompt="What is in the image?" + ) + # then + assert result == ["There is a person and a dog in the image."] diff --git a/inference_experimental/tests/integration_tests/models/test_smolvlm_preprocessing.py b/inference_experimental/tests/integration_tests/models/test_smolvlm_preprocessing.py new file mode 100644 index 0000000000..8a58050259 --- /dev/null +++ b/inference_experimental/tests/integration_tests/models/test_smolvlm_preprocessing.py @@ -0,0 +1,136 @@ +import numpy as np +import pytest +import torch + +from inference_exp.models.smolvlm.smolvlm_hf import SmolVLMHF + + +@pytest.fixture(scope="module") +def smolvlm_model(smolvlm_256m_path: str) -> SmolVLMHF: + return SmolVLMHF.from_pretrained(smolvlm_256m_path) + + +def get_preprocessed_outputs( + smolvlm_model: SmolVLMHF, + dog_image_numpy: np.ndarray, + dog_image_torch: torch.Tensor, +): + prompt = "What is in the image?" + # Process single numpy image (BGR) + numpy_output = smolvlm_model.pre_process_generation( + images=dog_image_numpy, prompt=prompt + ) + + # Process single torch tensor (RGB) + tensor_output = smolvlm_model.pre_process_generation( + images=dog_image_torch, prompt=prompt + ) + + # Process list of numpy images + list_numpy_output = smolvlm_model.pre_process_generation( + images=[dog_image_numpy, dog_image_numpy], + prompt=prompt, + images_to_single_prompt=False, + ) + + # Process list of torch tensors + list_tensor_output = smolvlm_model.pre_process_generation( + images=[dog_image_torch, dog_image_torch], + prompt=prompt, + images_to_single_prompt=False, + ) + + # Process batched tensor + batched_tensor = torch.stack([dog_image_torch, dog_image_torch]) + batched_tensor_output = smolvlm_model.pre_process_generation( + images=batched_tensor, prompt=prompt, images_to_single_prompt=False + ) + + return ( + numpy_output, + tensor_output, + list_numpy_output, + list_tensor_output, + batched_tensor_output, + ) + + +@pytest.mark.slow +def test_preprocessed_output_shapes( + smolvlm_model: SmolVLMHF, + dog_image_numpy: np.ndarray, + dog_image_torch: torch.Tensor, +): + # GIVEN + ( + numpy_output, + tensor_output, + list_numpy_output, + list_tensor_output, + batched_tensor_output, + ) = get_preprocessed_outputs(smolvlm_model, dog_image_numpy, dog_image_torch) + + # THEN + # Check shapes for single image inputs + assert "pixel_values" in numpy_output and numpy_output["pixel_values"].shape[0] == 1 + assert ( + "pixel_values" in tensor_output and tensor_output["pixel_values"].shape[0] == 1 + ) + + # Check shapes for multi-image inputs + assert ( + "pixel_values" in list_numpy_output + and list_numpy_output["pixel_values"].shape[0] == 2 + ) + assert ( + "pixel_values" in list_tensor_output + and list_tensor_output["pixel_values"].shape[0] == 2 + ) + assert ( + "pixel_values" in batched_tensor_output + and batched_tensor_output["pixel_values"].shape[0] == 2 + ) + + +@pytest.mark.slow +def test_internal_consistency_of_preprocessed_inputs( + smolvlm_model: SmolVLMHF, + dog_image_numpy: np.ndarray, + dog_image_torch: torch.Tensor, +): + # GIVEN + ( + numpy_output, + tensor_output, + list_numpy_output, + list_tensor_output, + batched_tensor_output, + ) = get_preprocessed_outputs(smolvlm_model, dog_image_numpy, dog_image_torch) + # The dog_image_numpy is BGR, dog_image_torch is RGB. + # The processor should handle the conversion, but let's compare RGB numpy to RGB tensor + prompt = "What is in the image?" + rgb_dog_image_numpy = dog_image_numpy[:, :, ::-1] + numpy_rgb_output = smolvlm_model.pre_process_generation( + images=rgb_dog_image_numpy, prompt=prompt, input_color_format="rgb" + ) + + # THEN + # Compare single numpy (RGB) and single tensor (RGB) + assert torch.allclose( + numpy_rgb_output["pixel_values"], tensor_output["pixel_values"], atol=1e-2 + ) + assert torch.allclose( + numpy_rgb_output["input_ids"], tensor_output["input_ids"], atol=1e-2 + ) + + # Compare list of tensors and batched tensor + assert torch.allclose( + list_tensor_output["pixel_values"], + batched_tensor_output["pixel_values"], + atol=1e-2, + ) + assert torch.allclose( + list_tensor_output["input_ids"], + batched_tensor_output["input_ids"], + atol=1e-2, + ) diff --git a/inference_experimental/uv.lock b/inference_experimental/uv.lock index dd3f021cdb..094eebb80e 100644 --- a/inference_experimental/uv.lock +++ b/inference_experimental/uv.lock @@ -531,7 +531,7 @@ wheels = [ [[package]] name = "inference-exp" -version = "0.13.0" +version = "0.14.0" source = { virtual = "." } dependencies = [ { name = "accelerate" }, diff --git a/mkdocs.yml b/mkdocs.yml index 73e77d6a1a..5e6381a7bb 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -234,6 +234,7 @@ plugins: implicit_index: True - macros: include_dir: docs/include + module_name: docs/scripts/macros markdown_extensions: diff --git a/requirements/requirements.waf.txt b/requirements/requirements.waf.txt deleted file mode 100644 index d5b5e0631a..0000000000 --- a/requirements/requirements.waf.txt +++ /dev/null @@ -1 +0,0 @@ -metlo>=0.0.17,<=0.1.5 \ No newline at end of file diff --git a/setup.py b/setup.py index fd7c31eec0..7683c7a3ee 100644 --- a/setup.py +++ b/setup.py @@ -56,7 +56,6 @@ def read_requirements(path): "requirements/requirements.gaze.txt", "requirements/requirements.groundingdino.txt", "requirements/requirements.hosted.txt", - "requirements/requirements.waf.txt", "requirements/requirements.yolo_world.txt", "requirements/requirements.code_analysis.txt", "requirements/requirements.test.unit.txt", diff --git a/tests/inference/unit_tests/usage_tracking/conftest.py b/tests/inference/unit_tests/usage_tracking/conftest.py new file mode 100644 index 0000000000..6e55576f72 --- /dev/null +++ b/tests/inference/unit_tests/usage_tracking/conftest.py @@ -0,0 +1,39 @@ +import importlib +from unittest.mock import MagicMock + +import pytest + + +@pytest.fixture +def usage_collector_with_mocked_threads(): + """ + Fixture that provides a UsageCollector instance with mocked threads. + This prevents the actual threads from starting during tests. + """ + import threading + original_thread = threading.Thread + original_event = threading.Event + + try: + threading.Thread = MagicMock() + threading.Event = MagicMock() + + from inference.usage_tracking import collector as collector_module + importlib.reload(collector_module) + + usage_collector = collector_module.usage_collector + threading.Thread = original_thread + threading.Event = original_event + + usage_collector._usage.clear() + if hasattr(usage_collector, "_hashed_api_keys"): + usage_collector._hashed_api_keys.clear() + if hasattr(usage_collector, "_resource_details"): + usage_collector._resource_details.clear() + + yield usage_collector + + finally: + threading.Thread = original_thread + threading.Event = original_event + importlib.reload(collector_module) diff --git a/tests/inference/unit_tests/usage_tracking/test_collector.py b/tests/inference/unit_tests/usage_tracking/test_collector.py index 0e08e6fa32..b4c3712be6 100644 --- a/tests/inference/unit_tests/usage_tracking/test_collector.py +++ b/tests/inference/unit_tests/usage_tracking/test_collector.py @@ -3,10 +3,10 @@ import sys import pytest +from unittest import mock from inference.core.env import LAMBDA from inference.core.version import __version__ as inference_version -from inference.usage_tracking.collector import UsageCollector from inference.usage_tracking.payload_helpers import ( get_api_key_usage_containing_resource, merge_usage_dicts, @@ -15,9 +15,9 @@ ) -def test_create_empty_usage_dict(): +def test_create_empty_usage_dict(usage_collector_with_mocked_threads): # given - usage_default_dict = UsageCollector.empty_usage_dict( + usage_default_dict = usage_collector_with_mocked_threads.empty_usage_dict( exec_session_id="exec_session_id" ) @@ -877,9 +877,9 @@ def test_zip_usage_payloads_with_different_exec_session_ids(): ] -def test_system_info_with_dedicated_deployment_id(): +def test_system_info_with_dedicated_deployment_id(usage_collector_with_mocked_threads): # given - system_info = UsageCollector.system_info( + system_info = usage_collector_with_mocked_threads.system_info( ip_address="w.x.y.z", hostname="hostname01", dedicated_deployment_id="deployment01", @@ -895,9 +895,9 @@ def test_system_info_with_dedicated_deployment_id(): assert system_info[k] == v -def test_system_info_with_no_dedicated_deployment_id(): +def test_system_info_with_no_dedicated_deployment_id(usage_collector_with_mocked_threads): # given - system_info = UsageCollector.system_info( + system_info = usage_collector_with_mocked_threads.system_info( ip_address="w.x.y.z", hostname="hostname01" ) @@ -911,9 +911,9 @@ def test_system_info_with_no_dedicated_deployment_id(): assert system_info[k] == v -def test_record_malformed_usage(): +def test_record_malformed_usage(usage_collector_with_mocked_threads): # given - collector = UsageCollector() + collector = usage_collector_with_mocked_threads # when collector.record_usage( @@ -938,3 +938,39 @@ def test_record_malformed_usage(): assert collector._usage[api_key]["model:None"]["resource_id"] == None assert collector._usage[api_key]["model:None"]["resource_details"] == "{}" assert collector._usage[api_key]["model:None"]["api_key_hash"] == api_key + + +def test_record_usage_with_exception(usage_collector_with_mocked_threads): + # given + usage_collector = usage_collector_with_mocked_threads + + @usage_collector(category="model") + def test_func(api_key="test_key"): + raise Exception("test exception") + + # when + with pytest.raises(Exception, match="test exception"): + test_func() + + # then + assert len(usage_collector._usage) == 0 + + +def test_record_usage_with_exception_on_GCP(usage_collector_with_mocked_threads): + # given + usage_collector = usage_collector_with_mocked_threads + + @usage_collector(category="model") + def test_func(api_key="test_key"): + raise Exception("test exception") + + # when + with mock.patch("inference.usage_tracking.collector.GCP_SERVERLESS", True): + with pytest.raises(Exception, match="test exception"): + test_func() + + # then + assert len(usage_collector._usage) == 1 + assert "test_key" in usage_collector._usage + assert "model:unknown" in usage_collector._usage["test_key"] + assert json.loads(usage_collector._usage["test_key"]["model:unknown"]["resource_details"]).get("error") == "test exception"