diff --git a/comfy_api_nodes/README.md b/comfy_api_nodes/README.md index e2633a7692c..64a389cc1e1 100644 --- a/comfy_api_nodes/README.md +++ b/comfy_api_nodes/README.md @@ -18,6 +18,8 @@ Follow the instructions [here](https://github.com/Comfy-Org/ComfyUI_frontend) to python run main.py --comfy-api-base https://stagingapi.comfy.org ``` +To authenticate to staging, please login and then ask one of Comfy Org team to whitelist you for access to staging. + API stubs are generated through automatic codegen tools from OpenAPI definitions. Since the Comfy Org OpenAPI definition contains many things from the Comfy Registry as well, we use redocly/cli to filter out only the paths relevant for API nodes. ### Redocly Instructions @@ -28,7 +30,7 @@ When developing locally, use the `redocly-dev.yaml` file to generate pydantic mo Before your API node PR merges, make sure to add the `Released` tag to the `openapi.yaml` file and test in staging. ```bash -# Download the OpenAPI file from prod server. +# Download the OpenAPI file from staging server. curl -o openapi.yaml https://stagingapi.comfy.org/openapi # Filter out unneeded API definitions. @@ -39,3 +41,25 @@ redocly bundle openapi.yaml --output filtered-openapi.yaml --config comfy_api_no datamodel-codegen --use-subclass-enum --field-constraints --strict-types bytes --input filtered-openapi.yaml --output comfy_api_nodes/apis/__init__.py --output-model-type pydantic_v2.BaseModel ``` + + +# Merging to Master + +Before merging to comfyanonymous/ComfyUI master, follow these steps: + +1. Add the "Released" tag to the ComfyUI OpenAPI yaml file for each endpoint you are using in the nodes. +1. Make sure the ComfyUI API is deployed to prod with your changes. +1. Run the code generation again with `redocly.yaml` and the production OpenAPI yaml file. + +```bash +# Download the OpenAPI file from prod server. +curl -o openapi.yaml https://api.comfy.org/openapi + +# Filter out unneeded API definitions. +npm install -g @redocly/cli +redocly bundle openapi.yaml --output filtered-openapi.yaml --config comfy_api_nodes/redocly.yaml --remove-unused-components + +# Generate the pydantic datamodels for validation. +datamodel-codegen --use-subclass-enum --field-constraints --strict-types bytes --input filtered-openapi.yaml --output comfy_api_nodes/apis/__init__.py --output-model-type pydantic_v2.BaseModel + +``` diff --git a/comfy_api_nodes/apinode_utils.py b/comfy_api_nodes/apinode_utils.py index 87d8c3e1d41..788e2803f5d 100644 --- a/comfy_api_nodes/apinode_utils.py +++ b/comfy_api_nodes/apinode_utils.py @@ -1,6 +1,7 @@ from __future__ import annotations import io import logging +import mimetypes from typing import Optional, Union from comfy.utils import common_upscale from comfy_api.input_impl import VideoFromFile @@ -214,6 +215,7 @@ def download_url_to_image_tensor(url: str, timeout: int = None) -> torch.Tensor: image_bytesio = download_url_to_bytesio(url, timeout) return bytesio_to_image_tensor(image_bytesio) + def process_image_response(response: requests.Response) -> torch.Tensor: """Uses content from a Response object and converts it to a torch.Tensor""" return bytesio_to_image_tensor(BytesIO(response.content)) @@ -318,11 +320,27 @@ def tensor_to_data_uri( return f"data:{mime_type};base64,{base64_string}" +def text_filepath_to_base64_string(filepath: str) -> str: + """Converts a text file to a base64 string.""" + with open(filepath, "rb") as f: + file_content = f.read() + return base64.b64encode(file_content).decode("utf-8") + + +def text_filepath_to_data_uri(filepath: str) -> str: + """Converts a text file to a data URI.""" + base64_string = text_filepath_to_base64_string(filepath) + mime_type, _ = mimetypes.guess_type(filepath) + if mime_type is None: + mime_type = "application/octet-stream" + return f"data:{mime_type};base64,{base64_string}" + + def upload_file_to_comfyapi( file_bytes_io: BytesIO, filename: str, upload_mime_type: str, - auth_kwargs: Optional[dict[str,str]] = None, + auth_kwargs: Optional[dict[str, str]] = None, ) -> str: """ Uploads a single file to ComfyUI API and returns its download URL. @@ -357,9 +375,33 @@ def upload_file_to_comfyapi( return response.download_url +def video_to_base64_string( + video: VideoInput, + container_format: VideoContainer = None, + codec: VideoCodec = None +) -> str: + """ + Converts a video input to a base64 string. + + Args: + video: The video input to convert + container_format: Optional container format to use (defaults to video.container if available) + codec: Optional codec to use (defaults to video.codec if available) + """ + video_bytes_io = io.BytesIO() + + # Use provided format/codec if specified, otherwise use video's own if available + format_to_use = container_format if container_format is not None else getattr(video, 'container', VideoContainer.MP4) + codec_to_use = codec if codec is not None else getattr(video, 'codec', VideoCodec.H264) + + video.save_to(video_bytes_io, format=format_to_use, codec=codec_to_use) + video_bytes_io.seek(0) + return base64.b64encode(video_bytes_io.getvalue()).decode("utf-8") + + def upload_video_to_comfyapi( video: VideoInput, - auth_kwargs: Optional[dict[str,str]] = None, + auth_kwargs: Optional[dict[str, str]] = None, container: VideoContainer = VideoContainer.MP4, codec: VideoCodec = VideoCodec.H264, max_duration: Optional[int] = None, @@ -461,7 +503,7 @@ def audio_ndarray_to_bytesio( def upload_audio_to_comfyapi( audio: AudioInput, - auth_kwargs: Optional[dict[str,str]] = None, + auth_kwargs: Optional[dict[str, str]] = None, container_format: str = "mp4", codec_name: str = "aac", mime_type: str = "audio/mp4", @@ -488,8 +530,25 @@ def upload_audio_to_comfyapi( return upload_file_to_comfyapi(audio_bytes_io, filename, mime_type, auth_kwargs) +def audio_to_base64_string( + audio: AudioInput, container_format: str = "mp4", codec_name: str = "aac" +) -> str: + """Converts an audio input to a base64 string.""" + sample_rate: int = audio["sample_rate"] + waveform: torch.Tensor = audio["waveform"] + audio_data_np = audio_tensor_to_contiguous_ndarray(waveform) + audio_bytes_io = audio_ndarray_to_bytesio( + audio_data_np, sample_rate, container_format, codec_name + ) + audio_bytes = audio_bytes_io.getvalue() + return base64.b64encode(audio_bytes).decode("utf-8") + + def upload_images_to_comfyapi( - image: torch.Tensor, max_images=8, auth_kwargs: Optional[dict[str,str]] = None, mime_type: Optional[str] = None + image: torch.Tensor, + max_images=8, + auth_kwargs: Optional[dict[str, str]] = None, + mime_type: Optional[str] = None, ) -> list[str]: """ Uploads images to ComfyUI API and returns download URLs. @@ -554,17 +613,24 @@ def upload_images_to_comfyapi( return download_urls -def resize_mask_to_image(mask: torch.Tensor, image: torch.Tensor, - upscale_method="nearest-exact", crop="disabled", - allow_gradient=True, add_channel_dim=False): +def resize_mask_to_image( + mask: torch.Tensor, + image: torch.Tensor, + upscale_method="nearest-exact", + crop="disabled", + allow_gradient=True, + add_channel_dim=False, +): """ Resize mask to be the same dimensions as an image, while maintaining proper format for API calls. """ _, H, W, _ = image.shape mask = mask.unsqueeze(-1) - mask = mask.movedim(-1,1) - mask = common_upscale(mask, width=W, height=H, upscale_method=upscale_method, crop=crop) - mask = mask.movedim(1,-1) + mask = mask.movedim(-1, 1) + mask = common_upscale( + mask, width=W, height=H, upscale_method=upscale_method, crop=crop + ) + mask = mask.movedim(1, -1) if not add_channel_dim: mask = mask.squeeze(-1) if not allow_gradient: @@ -572,12 +638,41 @@ def resize_mask_to_image(mask: torch.Tensor, image: torch.Tensor, return mask -def validate_string(string: str, strip_whitespace=True, field_name="prompt", min_length=None, max_length=None): +def validate_string( + string: str, + strip_whitespace=True, + field_name="prompt", + min_length=None, + max_length=None, +): + if string is None: + raise Exception(f"Field '{field_name}' cannot be empty.") if strip_whitespace: string = string.strip() if min_length and len(string) < min_length: - raise Exception(f"Field '{field_name}' cannot be shorter than {min_length} characters; was {len(string)} characters long.") + raise Exception( + f"Field '{field_name}' cannot be shorter than {min_length} characters; was {len(string)} characters long." + ) if max_length and len(string) > max_length: - raise Exception(f" Field '{field_name} cannot be longer than {max_length} characters; was {len(string)} characters long.") - if not string: - raise Exception(f"Field '{field_name}' cannot be empty.") + raise Exception( + f" Field '{field_name} cannot be longer than {max_length} characters; was {len(string)} characters long." + ) + + +def image_tensor_pair_to_batch( + image1: torch.Tensor, image2: torch.Tensor +) -> torch.Tensor: + """ + Converts a pair of image tensors to a batch tensor. + If the images are not the same size, the smaller image is resized to + match the larger image. + """ + if image1.shape[1:] != image2.shape[1:]: + image2 = common_upscale( + image2.movedim(-1, 1), + image1.shape[2], + image1.shape[1], + "bilinear", + "center", + ).movedim(1, -1) + return torch.cat((image1, image2), dim=0) diff --git a/comfy_api_nodes/apis/__init__.py b/comfy_api_nodes/apis/__init__.py index aa1c4ce0b35..e38d38cc908 100644 --- a/comfy_api_nodes/apis/__init__.py +++ b/comfy_api_nodes/apis/__init__.py @@ -1,236 +1,445 @@ # generated by datamodel-codegen: # filename: filtered-openapi.yaml -# timestamp: 2025-05-04T04:12:39+00:00 +# timestamp: 2025-05-19T21:38:55+00:00 from __future__ import annotations -from datetime import datetime +from datetime import date, datetime from enum import Enum from typing import Any, Dict, List, Literal, Optional, Union from uuid import UUID -from pydantic import AnyUrl, BaseModel, Field, RootModel, StrictBytes +from pydantic import AnyUrl, BaseModel, ConfigDict, Field, RootModel, StrictBytes -class PersonalAccessToken(BaseModel): - id: Optional[UUID] = Field(None, description='Unique identifier for the GitCommit') - name: Optional[str] = Field( - None, - description='Required. The name of the token. Can be a simple description.', - ) - description: Optional[str] = Field( - None, - description="Optional. A more detailed description of the token's intended use.", +class APIKey(BaseModel): + created_at: Optional[datetime] = None + description: Optional[str] = None + id: Optional[str] = None + key_prefix: Optional[str] = None + name: Optional[str] = None + + +class APIKeyWithPlaintext(APIKey): + plaintext_key: Optional[str] = Field( + None, description='The full API key (only returned at creation)' ) + + +class AuditLog(BaseModel): createdAt: Optional[datetime] = Field( - None, description='[Output Only]The date and time the token was created.' + None, description='The date and time the event was created' ) - token: Optional[str] = Field( - None, - description='[Output Only]. The personal access token. Only returned during creation.', + event_id: Optional[str] = Field(None, description='the id of the event') + event_type: Optional[str] = Field(None, description='the type of the event') + params: Optional[Dict[str, Any]] = Field( + None, description='data related to the event' ) -class GitCommitSummary(BaseModel): - commit_hash: Optional[str] = Field(None, description='The hash of the commit') - commit_name: Optional[str] = Field(None, description='The name of the commit') - branch_name: Optional[str] = Field( - None, description='The branch where the commit was made' +class OutputFormat(str, Enum): + jpeg = 'jpeg' + png = 'png' + + +class BFLFluxPro11GenerateRequest(BaseModel): + height: int = Field(..., description='Height of the generated image') + image_prompt: Optional[str] = Field(None, description='Optional image prompt') + output_format: Optional[OutputFormat] = Field( + None, description='Output image format' + ) + prompt: str = Field(..., description='The main text prompt for image generation') + prompt_upsampling: Optional[bool] = Field( + None, description='Whether to use prompt upsampling' ) - author: Optional[str] = Field(None, description='The author of the commit') - timestamp: Optional[datetime] = Field( - None, description='The timestamp when the commit was made' + safety_tolerance: Optional[int] = Field(None, description='Safety tolerance level') + seed: Optional[int] = Field(None, description='Random seed for reproducibility') + webhook_secret: Optional[str] = Field( + None, description='Optional webhook secret for async processing' ) - status_summary: Optional[Dict[str, str]] = Field( - None, description='A map of operating system to status pairs' + webhook_url: Optional[str] = Field( + None, description='Optional webhook URL for async processing' ) + width: int = Field(..., description='Width of the generated image') + + +class BFLFluxPro11GenerateResponse(BaseModel): + id: str = Field(..., description='Job ID for tracking') + polling_url: str = Field(..., description='URL to poll for results') -class User(BaseModel): - id: Optional[str] = Field(None, description='The unique id for this user.') - email: Optional[str] = Field(None, description='The email address for this user.') - name: Optional[str] = Field(None, description='The name for this user.') - isApproved: Optional[bool] = Field( - None, description='Indicates if the user is approved.' +class BFLFluxProGenerateRequest(BaseModel): + guidance_scale: Optional[float] = Field( + None, description='The guidance scale for generation.', ge=1.0, le=20.0 + ) + height: int = Field( + ..., description='The height of the image to generate.', ge=64, le=2048 + ) + negative_prompt: Optional[str] = Field( + None, description='The negative prompt for image generation.' + ) + num_images: Optional[int] = Field( + None, description='The number of images to generate.', ge=1, le=4 + ) + num_inference_steps: Optional[int] = Field( + None, description='The number of inference steps.', ge=1, le=100 ) - isAdmin: Optional[bool] = Field( - None, description='Indicates if the user has admin privileges.' + prompt: str = Field(..., description='The text prompt for image generation.') + seed: Optional[int] = Field(None, description='The seed value for reproducibility.') + width: int = Field( + ..., description='The width of the image to generate.', ge=64, le=2048 ) -class PublisherUser(BaseModel): - id: Optional[str] = Field(None, description='The unique id for this user.') - email: Optional[str] = Field(None, description='The email address for this user.') - name: Optional[str] = Field(None, description='The name for this user.') +class BFLFluxProGenerateResponse(BaseModel): + id: str = Field(..., description='The unique identifier for the generation task.') + polling_url: str = Field(..., description='URL to poll for the generation result.') -class ErrorResponse(BaseModel): - error: str - message: str +class Status(str, Enum): + in_progress = 'in_progress' + completed = 'completed' + incomplete = 'incomplete' -class StorageFile(BaseModel): - id: Optional[UUID] = Field( - None, description='Unique identifier for the storage file' - ) - file_path: Optional[str] = Field(None, description='Path to the file in storage') - public_url: Optional[str] = Field(None, description='Public URL') +class Type(str, Enum): + computer_call = 'computer_call' -class PublisherMember(BaseModel): - id: Optional[str] = Field( - None, description='The unique identifier for the publisher member.' +class ComputerToolCall(BaseModel): + action: Dict[str, Any] + call_id: str = Field( + ..., + description='An identifier used when responding to the tool call with output.\n', ) - user: Optional[PublisherUser] = Field( - None, description='The user associated with this publisher member.' + id: str = Field(..., description='The unique ID of the computer call.') + status: Status = Field( + ..., + description='The status of the item. One of `in_progress`, `completed`, or\n`incomplete`. Populated when items are returned via API.\n', ) - role: Optional[str] = Field( - None, description='The role of the user in the publisher.' + type: Type = Field( + ..., description='The type of the computer call. Always `computer_call`.' ) -class ComfyNode(BaseModel): - comfy_node_name: Optional[str] = Field( - None, description='Unique identifier for the node' +class Environment(str, Enum): + windows = 'windows' + mac = 'mac' + linux = 'linux' + ubuntu = 'ubuntu' + browser = 'browser' + + +class Type1(str, Enum): + computer_use_preview = 'computer_use_preview' + + +class ComputerUsePreviewTool(BaseModel): + display_height: int = Field(..., description='The height of the computer display.') + display_width: int = Field(..., description='The width of the computer display.') + environment: Environment = Field( + ..., description='The type of computer environment to control.' ) - category: Optional[str] = Field( - None, - description='UI category where the node is listed, used for grouping nodes.', + type: Literal['ComputerUsePreviewTool'] = Field( + ..., + description='The type of the computer use tool. Always `computer_use_preview`.', ) - description: Optional[str] = Field( - None, description="Brief description of the node's functionality or purpose." + + +class CreateAPIKeyRequest(BaseModel): + description: Optional[str] = None + name: str + + +class Customer(BaseModel): + createdAt: Optional[datetime] = Field( + None, description='The date and time the user was created' ) - input_types: Optional[str] = Field(None, description='Defines input parameters') - deprecated: Optional[bool] = Field( - None, - description='Indicates if the node is deprecated. Deprecated nodes are hidden in the UI.', + email: Optional[str] = Field(None, description='The email address for this user') + id: str = Field(..., description='The firebase UID of the user') + is_admin: Optional[bool] = Field(None, description='Whether the user is an admin') + metronome_id: Optional[str] = Field(None, description='The Metronome customer ID') + name: Optional[str] = Field(None, description='The name for this user') + stripe_id: Optional[str] = Field(None, description='The Stripe customer ID') + updatedAt: Optional[datetime] = Field( + None, description='The date and time the user was last updated' ) - experimental: Optional[bool] = Field( + + +class CustomerStorageResourceResponse(BaseModel): + download_url: Optional[str] = Field( None, - description='Indicates if the node is experimental, subject to changes or removal.', - ) - output_is_list: Optional[List[bool]] = Field( - None, description='Boolean values indicating if each output is a list.' + description='The signed URL to use for downloading the file from the specified path', ) - return_names: Optional[str] = Field( - None, description='Names of the outputs for clarity in workflows.' + existing_file: Optional[bool] = Field( + None, description='Whether an existing file with the same hash was found' ) - return_types: Optional[str] = Field( - None, description='Specifies the types of outputs produced by the node.' + expires_at: Optional[datetime] = Field( + None, description='When the signed URL will expire' ) - function: Optional[str] = Field( - None, description='Name of the entry-point function to execute the node.' + upload_url: Optional[str] = Field( + None, + description='The signed URL to use for uploading the file to the specified path', ) -class ComfyNodeCloudBuildInfo(BaseModel): - project_id: Optional[str] = None - project_number: Optional[str] = None - location: Optional[str] = None - build_id: Optional[str] = None +class Role(str, Enum): + user = 'user' + assistant = 'assistant' + system = 'system' + developer = 'developer' -class Error(BaseModel): - message: Optional[str] = Field( - None, description='A clear and concise description of the error.' +class Type2(str, Enum): + message = 'message' + + +class ErrorResponse(BaseModel): + error: str + message: str + + +class Type3(str, Enum): + file_search = 'file_search' + + +class FileSearchTool(BaseModel): + type: Literal['FileSearchTool'] = Field(..., description='The type of tool') + vector_store_ids: List[str] = Field( + ..., description='IDs of vector stores to search in' ) - details: Optional[List[str]] = Field( - None, - description='Optional detailed information about the error or hints for resolving it.', + + +class Result(BaseModel): + file_id: Optional[str] = Field(None, description='The unique ID of the file.\n') + filename: Optional[str] = Field(None, description='The name of the file.\n') + score: Optional[float] = Field( + None, description='The relevance score of the file - a value between 0 and 1.\n' + ) + text: Optional[str] = Field( + None, description='The text that was retrieved from the file.\n' ) -class NodeVersionUpdateRequest(BaseModel): - changelog: Optional[str] = Field( - None, description='The changelog describing the version changes.' +class Status1(str, Enum): + in_progress = 'in_progress' + searching = 'searching' + completed = 'completed' + incomplete = 'incomplete' + failed = 'failed' + + +class Type4(str, Enum): + file_search_call = 'file_search_call' + + +class FileSearchToolCall(BaseModel): + id: str = Field(..., description='The unique ID of the file search tool call.\n') + queries: List[str] = Field( + ..., description='The queries used to search for files.\n' ) - deprecated: Optional[bool] = Field( - None, description='Whether the version is deprecated.' + results: Optional[List[Result]] = Field( + None, description='The results of the file search tool call.\n' + ) + status: Status1 = Field( + ..., + description='The status of the file search tool call. One of `in_progress`, \n`searching`, `incomplete` or `failed`,\n', + ) + type: Type4 = Field( + ..., + description='The type of the file search tool call. Always `file_search_call`.\n', ) -class NodeStatus(str, Enum): - NodeStatusActive = 'NodeStatusActive' - NodeStatusDeleted = 'NodeStatusDeleted' - NodeStatusBanned = 'NodeStatusBanned' +class Type5(str, Enum): + function = 'function' -class NodeVersionStatus(str, Enum): - NodeVersionStatusActive = 'NodeVersionStatusActive' - NodeVersionStatusDeleted = 'NodeVersionStatusDeleted' - NodeVersionStatusBanned = 'NodeVersionStatusBanned' - NodeVersionStatusPending = 'NodeVersionStatusPending' - NodeVersionStatusFlagged = 'NodeVersionStatusFlagged' +class FunctionTool(BaseModel): + description: Optional[str] = Field( + None, description='Description of what the function does' + ) + name: str = Field(..., description='Name of the function') + parameters: Dict[str, Any] = Field( + ..., description='JSON Schema object describing the function parameters' + ) + type: Literal['FunctionTool'] = Field(..., description='The type of tool') -class PublisherStatus(str, Enum): - PublisherStatusActive = 'PublisherStatusActive' - PublisherStatusBanned = 'PublisherStatusBanned' +class Status2(str, Enum): + in_progress = 'in_progress' + completed = 'completed' + incomplete = 'incomplete' -class WorkflowRunStatus(str, Enum): - WorkflowRunStatusStarted = 'WorkflowRunStatusStarted' - WorkflowRunStatusFailed = 'WorkflowRunStatusFailed' - WorkflowRunStatusCompleted = 'WorkflowRunStatusCompleted' +class Type6(str, Enum): + function_call = 'function_call' -class MachineStats(BaseModel): - machine_name: Optional[str] = Field(None, description='Name of the machine.') - os_version: Optional[str] = Field( - None, description='The operating system version. eg. Ubuntu Linux 20.04' +class FunctionToolCall(BaseModel): + arguments: str = Field( + ..., description='A JSON string of the arguments to pass to the function.\n' + ) + call_id: str = Field( + ..., + description='The unique ID of the function tool call generated by the model.\n', + ) + id: Optional[str] = Field( + None, description='The unique ID of the function tool call.\n' + ) + name: str = Field(..., description='The name of the function to run.\n') + status: Optional[Status2] = Field( + None, + description='The status of the item. One of `in_progress`, `completed`, or\n`incomplete`. Populated when items are returned via API.\n', ) - gpu_type: Optional[str] = Field( - None, description='The GPU type. eg. NVIDIA Tesla K80' + type: Type6 = Field( + ..., description='The type of the function tool call. Always `function_call`.\n' ) - cpu_capacity: Optional[str] = Field(None, description='Total CPU on the machine.') - initial_cpu: Optional[str] = Field( - None, description='Initial CPU available before the job starts.' + + +class GeminiCitation(BaseModel): + authors: Optional[List[str]] = None + endIndex: Optional[int] = None + license: Optional[str] = None + publicationDate: Optional[date] = None + startIndex: Optional[int] = None + title: Optional[str] = None + uri: Optional[str] = None + + +class GeminiCitationMetadata(BaseModel): + citations: Optional[List[GeminiCitation]] = None + + +class Role1(str, Enum): + user = 'user' + model = 'model' + + +class GeminiFunctionDeclaration(BaseModel): + description: Optional[str] = None + name: str + parameters: Dict[str, Any] = Field( + ..., description='JSON schema for the function parameters' ) - memory_capacity: Optional[str] = Field( - None, description='Total memory on the machine.' + + +class GeminiGenerationConfig(BaseModel): + maxOutputTokens: Optional[int] = Field( + None, + description='Maximum number of tokens that can be generated in the response. A token is approximately 4 characters. 100 tokens correspond to roughly 60-80 words.\n', + examples=[2048], + ge=16, + le=8192, ) - initial_ram: Optional[str] = Field( - None, description='Initial RAM available before the job starts.' + seed: Optional[int] = Field( + None, + description="When seed is fixed to a specific value, the model makes a best effort to provide the same response for repeated requests. Deterministic output isn't guaranteed. Also, changing the model or parameter settings, such as the temperature, can cause variations in the response even when you use the same seed value. By default, a random seed value is used. Available for the following models:, gemini-2.5-flash-preview-04-1, gemini-2.5-pro-preview-05-0, gemini-2.0-flash-lite-00, gemini-2.0-flash-001\n", + examples=[343940597], ) - vram_time_series: Optional[Dict[str, Any]] = Field( - None, description='Time series of VRAM usage.' + stopSequences: Optional[List[str]] = None + temperature: Optional[float] = Field( + 1, + description="The temperature is used for sampling during response generation, which occurs when topP and topK are applied. Temperature controls the degree of randomness in token selection. Lower temperatures are good for prompts that require a less open-ended or creative response, while higher temperatures can lead to more diverse or creative results. A temperature of 0 means that the highest probability tokens are always selected. In this case, responses for a given prompt are mostly deterministic, but a small amount of variation is still possible. If the model returns a response that's too generic, too short, or the model gives a fallback response, try increasing the temperature\n", + ge=0.0, + le=2.0, ) - disk_capacity: Optional[str] = Field( - None, description='Total disk capacity on the machine.' + topK: Optional[int] = Field( + 40, + description="Top-K changes how the model selects tokens for output. A top-K of 1 means the next selected token is the most probable among all tokens in the model's vocabulary. A top-K of 3 means that the next token is selected from among the 3 most probable tokens by using temperature.\n", + examples=[40], + ge=1, ) - initial_disk: Optional[str] = Field( - None, description='Initial disk available before the job starts.' + topP: Optional[float] = Field( + 0.95, + description='If specified, nucleus sampling is used.\nTop-P changes how the model selects tokens for output. Tokens are selected from the most (see top-K) to least probable until the sum of their probabilities equals the top-P value. For example, if tokens A, B, and C have a probability of 0.3, 0.2, and 0.1 and the top-P value is 0.5, then the model will select either A or B as the next token by using temperature and excludes C as a candidate.\nSpecify a lower value for less random responses and a higher value for more random responses.\n', + ge=0.0, + le=1.0, ) - pip_freeze: Optional[str] = Field(None, description='The pip freeze output') -class Customer(BaseModel): - id: str = Field(..., description='The firebase UID of the user') - email: Optional[str] = Field(None, description='The email address for this user') - name: Optional[str] = Field(None, description='The name for this user') - createdAt: Optional[datetime] = Field( - None, description='The date and time the user was created' +class GeminiMimeType(str, Enum): + application_pdf = 'application/pdf' + audio_mpeg = 'audio/mpeg' + audio_mp3 = 'audio/mp3' + audio_wav = 'audio/wav' + image_png = 'image/png' + image_jpeg = 'image/jpeg' + image_webp = 'image/webp' + text_plain = 'text/plain' + video_mov = 'video/mov' + video_mpeg = 'video/mpeg' + video_mp4 = 'video/mp4' + video_mpg = 'video/mpg' + video_avi = 'video/avi' + video_wmv = 'video/wmv' + video_mpegps = 'video/mpegps' + video_flv = 'video/flv' + + +class GeminiOffset(BaseModel): + nanos: Optional[int] = Field( + None, + description='Signed fractions of a second at nanosecond resolution. Negative second values with fractions must still have non-negative nanos values.\n', + examples=[0], + ge=0, + le=999999999, ) - updatedAt: Optional[datetime] = Field( - None, description='The date and time the user was last updated' + seconds: Optional[int] = Field( + None, + description='Signed seconds of the span of time. Must be from -315,576,000,000 to +315,576,000,000 inclusive.\n', + examples=[60], + ge=-315576000000, + le=315576000000, ) -class MagicPrompt(str, Enum): - ON = 'ON' +class GeminiSafetyCategory(str, Enum): + HARM_CATEGORY_SEXUALLY_EXPLICIT = 'HARM_CATEGORY_SEXUALLY_EXPLICIT' + HARM_CATEGORY_HATE_SPEECH = 'HARM_CATEGORY_HATE_SPEECH' + HARM_CATEGORY_HARASSMENT = 'HARM_CATEGORY_HARASSMENT' + HARM_CATEGORY_DANGEROUS_CONTENT = 'HARM_CATEGORY_DANGEROUS_CONTENT' + + +class Probability(str, Enum): + NEGLIGIBLE = 'NEGLIGIBLE' + LOW = 'LOW' + MEDIUM = 'MEDIUM' + HIGH = 'HIGH' + UNKNOWN = 'UNKNOWN' + + +class GeminiSafetyRating(BaseModel): + category: Optional[GeminiSafetyCategory] = None + probability: Optional[Probability] = Field( + None, + description='The probability that the content violates the specified safety category', + ) + + +class GeminiSafetyThreshold(str, Enum): OFF = 'OFF' + BLOCK_NONE = 'BLOCK_NONE' + BLOCK_LOW_AND_ABOVE = 'BLOCK_LOW_AND_ABOVE' + BLOCK_MEDIUM_AND_ABOVE = 'BLOCK_MEDIUM_AND_ABOVE' + BLOCK_ONLY_HIGH = 'BLOCK_ONLY_HIGH' -class ColorPalette(BaseModel): - name: str = Field(..., description='Name of the color palette', examples=['PASTEL']) +class GeminiTextPart(BaseModel): + text: Optional[str] = Field( + None, + description='A text prompt or code snippet.', + examples=['Answer as concisely as possible'], + ) -class StyleCode(RootModel[str]): - root: str = Field(..., pattern='^[0-9A-Fa-f]{8}$') +class GeminiTool(BaseModel): + functionDeclarations: Optional[List[GeminiFunctionDeclaration]] = None -class StyleType(str, Enum): - GENERAL = 'GENERAL' +class GeminiVideoMetadata(BaseModel): + endOffset: Optional[GeminiOffset] = None + startOffset: Optional[GeminiOffset] = None class IdeogramColorPalette1(BaseModel): @@ -262,27 +471,17 @@ class IdeogramColorPalette( class ImageRequest(BaseModel): - prompt: str = Field( - ..., description='Required. The prompt to use to generate the image.' - ) aspect_ratio: Optional[str] = Field( None, description="Optional. The aspect ratio (e.g., 'ASPECT_16_9', 'ASPECT_1_1'). Cannot be used with resolution. Defaults to 'ASPECT_1_1' if unspecified.", ) - model: str = Field(..., description="The model used (e.g., 'V_2', 'V_2A_TURBO')") + color_palette: Optional[Dict[str, Any]] = Field( + None, description='Optional. Color palette object. Only for V_2, V_2_TURBO.' + ) magic_prompt_option: Optional[str] = Field( None, description="Optional. MagicPrompt usage ('AUTO', 'ON', 'OFF')." ) - seed: Optional[int] = Field( - None, - description='Optional. A number between 0 and 2147483647.', - ge=0, - le=2147483647, - ) - style_type: Optional[str] = Field( - None, - description="Optional. Style type ('AUTO', 'GENERAL', 'REALISTIC', 'DESIGN', 'RENDER_3D', 'ANIME'). Only for models V_2 and above.", - ) + model: str = Field(..., description="The model used (e.g., 'V_2', 'V_2A_TURBO')") negative_prompt: Optional[str] = Field( None, description='Optional. Description of what to exclude. Only for V_1, V_1_TURBO, V_2, V_2_TURBO.', @@ -293,12 +492,22 @@ class ImageRequest(BaseModel): ge=1, le=8, ) + prompt: str = Field( + ..., description='Required. The prompt to use to generate the image.' + ) resolution: Optional[str] = Field( None, description="Optional. Resolution (e.g., 'RESOLUTION_1024_1024'). Only for model V_2. Cannot be used with aspect_ratio.", ) - color_palette: Optional[Dict[str, Any]] = Field( - None, description='Optional. Color palette object. Only for V_2, V_2_TURBO.' + seed: Optional[int] = Field( + None, + description='Optional. A number between 0 and 2147483647.', + ge=0, + le=2147483647, + ) + style_type: Optional[str] = Field( + None, + description="Optional. Style type ('AUTO', 'GENERAL', 'REALISTIC', 'DESIGN', 'RENDER_3D', 'ANIME'). Only for models V_2 and above.", ) @@ -309,23 +518,23 @@ class IdeogramGenerateRequest(BaseModel): class Datum(BaseModel): + is_image_safe: Optional[bool] = Field( + None, description='Indicates whether the image is considered safe.' + ) prompt: Optional[str] = Field( None, description='The prompt used to generate this image.' ) resolution: Optional[str] = Field( None, description="The resolution of the generated image (e.g., '1024x1024')." ) - is_image_safe: Optional[bool] = Field( - None, description='Indicates whether the image is considered safe.' - ) seed: Optional[int] = Field( None, description='The seed value used for this generation.' ) - url: Optional[str] = Field(None, description='URL to the generated image.') style_type: Optional[str] = Field( None, description="The style type used for generation (e.g., 'REALISTIC', 'ANIME').", ) + url: Optional[str] = Field(None, description='URL to the generated image.') class IdeogramGenerateResponse(BaseModel): @@ -337,19 +546,48 @@ class IdeogramGenerateResponse(BaseModel): ) +class StyleCode(RootModel[str]): + root: str = Field(..., pattern='^[0-9A-Fa-f]{8}$') + + +class Datum1(BaseModel): + is_image_safe: Optional[bool] = None + prompt: Optional[str] = None + resolution: Optional[str] = None + seed: Optional[int] = None + style_type: Optional[str] = None + url: Optional[str] = None + + +class IdeogramV3IdeogramResponse(BaseModel): + created: Optional[datetime] = None + data: Optional[List[Datum1]] = None + + class RenderingSpeed1(str, Enum): TURBO = 'TURBO' DEFAULT = 'DEFAULT' QUALITY = 'QUALITY' -class MagicPrompt1(str, Enum): - AUTO = 'AUTO' +class IdeogramV3ReframeRequest(BaseModel): + color_palette: Optional[Dict[str, Any]] = None + image: Optional[StrictBytes] = None + num_images: Optional[int] = Field(None, ge=1, le=8) + rendering_speed: Optional[RenderingSpeed1] = None + resolution: str + seed: Optional[int] = Field(None, ge=0, le=2147483647) + style_codes: Optional[List[str]] = None + style_reference_images: Optional[List[StrictBytes]] = None + + +class MagicPrompt(str, Enum): + AUTO = 'AUTO' ON = 'ON' OFF = 'OFF' -class StyleType1(str, Enum): +class StyleType(str, Enum): AUTO = 'AUTO' GENERAL = 'GENERAL' REALISTIC = 'REALISTIC' @@ -357,104 +595,170 @@ class StyleType1(str, Enum): class IdeogramV3RemixRequest(BaseModel): + aspect_ratio: Optional[str] = None + color_palette: Optional[Dict[str, Any]] = None image: Optional[StrictBytes] = None - prompt: str image_weight: Optional[int] = Field(50, ge=1, le=100) - seed: Optional[int] = Field(None, ge=0, le=2147483647) - resolution: Optional[str] = None - aspect_ratio: Optional[str] = None - rendering_speed: Optional[RenderingSpeed1] = None - magic_prompt: Optional[MagicPrompt1] = None + magic_prompt: Optional[MagicPrompt] = None negative_prompt: Optional[str] = None num_images: Optional[int] = Field(None, ge=1, le=8) - color_palette: Optional[Dict[str, Any]] = None - style_codes: Optional[List[str]] = None - style_type: Optional[StyleType1] = None - style_reference_images: Optional[List[StrictBytes]] = None - - -class Datum1(BaseModel): - prompt: Optional[str] = None + prompt: str + rendering_speed: Optional[RenderingSpeed1] = None resolution: Optional[str] = None - is_image_safe: Optional[bool] = None - seed: Optional[int] = None - url: Optional[str] = None - style_type: Optional[str] = None - - -class IdeogramV3IdeogramResponse(BaseModel): - created: Optional[datetime] = None - data: Optional[List[Datum1]] = None - - -class IdeogramV3ReframeRequest(BaseModel): - image: Optional[StrictBytes] = None - resolution: str - num_images: Optional[int] = Field(None, ge=1, le=8) seed: Optional[int] = Field(None, ge=0, le=2147483647) - rendering_speed: Optional[RenderingSpeed1] = None - color_palette: Optional[Dict[str, Any]] = None style_codes: Optional[List[str]] = None style_reference_images: Optional[List[StrictBytes]] = None + style_type: Optional[StyleType] = None class IdeogramV3ReplaceBackgroundRequest(BaseModel): + color_palette: Optional[Dict[str, Any]] = None image: Optional[StrictBytes] = None - prompt: str - magic_prompt: Optional[MagicPrompt1] = None + magic_prompt: Optional[MagicPrompt] = None num_images: Optional[int] = Field(None, ge=1, le=8) - seed: Optional[int] = Field(None, ge=0, le=2147483647) + prompt: str rendering_speed: Optional[RenderingSpeed1] = None - color_palette: Optional[Dict[str, Any]] = None + seed: Optional[int] = Field(None, ge=0, le=2147483647) style_codes: Optional[List[str]] = None style_reference_images: Optional[List[StrictBytes]] = None -class KlingTaskStatus(str, Enum): - submitted = 'submitted' - processing = 'processing' - succeed = 'succeed' - failed = 'failed' +class ColorPalette(BaseModel): + name: str = Field(..., description='Name of the color palette', examples=['PASTEL']) -class KlingVideoGenModelName(str, Enum): - kling_v1 = 'kling-v1' - kling_v1_5 = 'kling-v1-5' - kling_v1_6 = 'kling-v1-6' - kling_v2_master = 'kling-v2-master' +class MagicPrompt2(str, Enum): + ON = 'ON' + OFF = 'OFF' -class KlingVideoGenMode(str, Enum): - std = 'std' - pro = 'pro' +class StyleType1(str, Enum): + GENERAL = 'GENERAL' -class KlingVideoGenAspectRatio(str, Enum): - field_16_9 = '16:9' - field_9_16 = '9:16' +class ImagenImageGenerationInstance(BaseModel): + prompt: str = Field(..., description='Text prompt for image generation') + + +class AspectRatio(str, Enum): field_1_1 = '1:1' + field_9_16 = '9:16' + field_16_9 = '16:9' + field_3_4 = '3:4' + field_4_3 = '4:3' -class KlingVideoGenDuration(str, Enum): - field_5 = '5' - field_10 = '10' +class PersonGeneration(str, Enum): + dont_allow = 'dont_allow' + allow_adult = 'allow_adult' + allow_all = 'allow_all' -class KlingVideoGenCfgScale(RootModel[float]): - root: float = Field( +class SafetySetting(str, Enum): + block_most = 'block_most' + block_some = 'block_some' + block_few = 'block_few' + block_fewest = 'block_fewest' + + +class ImagenImagePrediction(BaseModel): + bytesBase64Encoded: Optional[str] = Field( + None, description='Base64-encoded image content' + ) + mimeType: Optional[str] = Field( + None, description='MIME type of the generated image' + ) + prompt: Optional[str] = Field( + None, description='Enhanced or rewritten prompt used to generate this image' + ) + + +class MimeType(str, Enum): + image_png = 'image/png' + image_jpeg = 'image/jpeg' + + +class ImagenOutputOptions(BaseModel): + compressionQuality: Optional[int] = Field(None, ge=0, le=100) + mimeType: Optional[MimeType] = None + + +class Includable(str, Enum): + file_search_call_results = 'file_search_call.results' + message_input_image_image_url = 'message.input_image.image_url' + computer_call_output_output_image_url = 'computer_call_output.output.image_url' + + +class Type7(str, Enum): + input_file = 'input_file' + + +class InputFileContent(BaseModel): + file_data: Optional[str] = Field( + None, description='The content of the file to be sent to the model.\n' + ) + file_id: Optional[str] = Field( + None, description='The ID of the file to be sent to the model.' + ) + filename: Optional[str] = Field( + None, description='The name of the file to be sent to the model.' + ) + type: Type7 = Field( + ..., description='The type of the input item. Always `input_file`.' + ) + + +class Detail(str, Enum): + low = 'low' + high = 'high' + auto = 'auto' + + +class Type8(str, Enum): + input_image = 'input_image' + + +class InputImageContent(BaseModel): + detail: Detail = Field( ..., - description="Flexibility in video generation. The higher the value, the lower the model's degree of flexibility, and the stronger the relevance to the user's prompt.", - ge=0.0, - le=1.0, + description='The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`. Defaults to `auto`.', + ) + file_id: Optional[str] = Field( + None, description='The ID of the file to be sent to the model.' + ) + image_url: Optional[str] = Field( + None, + description='The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image in a data URL.', + ) + type: Type8 = Field( + ..., description='The type of the input item. Always `input_image`.' ) -class KlingCameraControlType(str, Enum): - simple = 'simple' - down_back = 'down_back' - forward_up = 'forward_up' - right_turn_forward = 'right_turn_forward' - left_turn_forward = 'left_turn_forward' +class Role3(str, Enum): + user = 'user' + system = 'system' + developer = 'developer' + + +class Type9(str, Enum): + message = 'message' + + +class Type10(str, Enum): + input_text = 'input_text' + + +class InputTextContent(BaseModel): + text: str = Field(..., description='The text input to the model.') + type: Type10 = Field( + ..., description='The type of the input item. Always `input_text`.' + ) + + +class KlingAudioUploadType(str, Enum): + file = 'file' + url = 'url' class KlingCameraConfig(BaseModel): @@ -464,15 +768,15 @@ class KlingCameraConfig(BaseModel): ge=-10.0, le=10.0, ) - vertical: Optional[float] = Field( + pan: Optional[float] = Field( None, - description="Controls camera's movement along vertical axis (y-axis). Negative indicates downward, positive indicates upward.", + description="Controls camera's rotation in vertical plane (x-axis). Negative indicates downward rotation, positive indicates upward rotation.", ge=-10.0, le=10.0, ) - pan: Optional[float] = Field( + roll: Optional[float] = Field( None, - description="Controls camera's rotation in vertical plane (x-axis). Negative indicates downward rotation, positive indicates upward rotation.", + description="Controls camera's rolling amount (z-axis). Negative indicates counterclockwise, positive indicates clockwise.", ge=-10.0, le=10.0, ) @@ -482,9 +786,9 @@ class KlingCameraConfig(BaseModel): ge=-10.0, le=10.0, ) - roll: Optional[float] = Field( + vertical: Optional[float] = Field( None, - description="Controls camera's rolling amount (z-axis). Negative indicates counterclockwise, positive indicates clockwise.", + description="Controls camera's movement along vertical axis (y-axis). Negative indicates downward, positive indicates upward.", ge=-10.0, le=10.0, ) @@ -496,25 +800,18 @@ class KlingCameraConfig(BaseModel): ) -class KlingVideoResult(BaseModel): - id: Optional[str] = Field(None, description='Generated video ID') - url: Optional[AnyUrl] = Field(None, description='URL for generated video') - duration: Optional[str] = Field(None, description='Total video duration') - - -class KlingAudioUploadType(str, Enum): - file = 'file' - url = 'url' - - -class KlingLipSyncMode(str, Enum): - text2video = 'text2video' - audio2video = 'audio2video' +class KlingCameraControlType(str, Enum): + simple = 'simple' + down_back = 'down_back' + forward_up = 'forward_up' + right_turn_forward = 'right_turn_forward' + left_turn_forward = 'left_turn_forward' -class KlingLipSyncVoiceLanguage(str, Enum): - zh = 'zh' - en = 'en' +class KlingCharacterEffectModelName(str, Enum): + kling_v1 = 'kling-v1' + kling_v1_5 = 'kling-v1-5' + kling_v1_6 = 'kling-v1-6' class KlingDualCharacterEffectsScene(str, Enum): @@ -523,30 +820,42 @@ class KlingDualCharacterEffectsScene(str, Enum): heart_gesture = 'heart_gesture' -class KlingSingleImageEffectsScene(str, Enum): - bloombloom = 'bloombloom' - dizzydizzy = 'dizzydizzy' - fuzzyfuzzy = 'fuzzyfuzzy' - squish = 'squish' - expansion = 'expansion' +class KlingDualCharacterImages(RootModel[List[str]]): + root: List[str] = Field(..., max_length=2, min_length=2) -class KlingCharacterEffectModelName(str, Enum): - kling_v1 = 'kling-v1' - kling_v1_5 = 'kling-v1-5' - kling_v1_6 = 'kling-v1-6' +class KlingErrorResponse(BaseModel): + code: int = Field( + ..., + description='- 1000: Authentication failed\n- 1001: Authorization is empty\n- 1002: Authorization is invalid\n- 1003: Authorization is not yet valid\n- 1004: Authorization has expired\n- 1100: Account exception\n- 1101: Account in arrears (postpaid scenario)\n- 1102: Resource pack depleted or expired (prepaid scenario)\n- 1103: Unauthorized access to requested resource\n- 1200: Invalid request parameters\n- 1201: Invalid parameters\n- 1202: Invalid request method\n- 1203: Requested resource does not exist\n- 1300: Trigger platform strategy\n- 1301: Trigger content security policy\n- 1302: API request too frequent\n- 1303: Concurrency/QPS exceeds limit\n- 1304: Trigger IP whitelist policy\n- 5000: Internal server error\n- 5001: Service temporarily unavailable\n- 5002: Server internal timeout\n', + ) + message: str = Field(..., description='Human-readable error message') + request_id: str = Field( + ..., description='Request ID for tracking and troubleshooting' + ) -class KlingSingleImageEffectModelName(str, Enum): - kling_v1_6 = 'kling-v1-6' +class Trajectory(BaseModel): + x: Optional[int] = Field( + None, + description='The horizontal coordinate of trajectory point. Based on bottom-left corner of image as origin (0,0).', + ) + y: Optional[int] = Field( + None, + description='The vertical coordinate of trajectory point. Based on bottom-left corner of image as origin (0,0).', + ) -class KlingSingleImageEffectDuration(str, Enum): - field_5 = '5' +class DynamicMask(BaseModel): + mask: Optional[AnyUrl] = Field( + None, + description='Dynamic Brush Application Area (Mask image created by users using the motion brush). The aspect ratio must match the input image.', + ) + trajectories: Optional[List[Trajectory]] = None -class KlingDualCharacterImages(RootModel[List[str]]): - root: List[str] = Field(..., max_length=2, min_length=2) +class TaskInfo(BaseModel): + external_task_id: Optional[str] = None class KlingImageGenAspectRatio(str, Enum): @@ -571,1432 +880,484 @@ class KlingImageGenModelName(str, Enum): kling_v2 = 'kling-v2' +class KlingImageGenerationsRequest(BaseModel): + aspect_ratio: Optional[KlingImageGenAspectRatio] = '16:9' + callback_url: Optional[AnyUrl] = Field( + None, description='The callback notification address' + ) + human_fidelity: Optional[float] = Field( + 0.45, description='Subject reference similarity', ge=0.0, le=1.0 + ) + image: Optional[str] = Field( + None, description='Reference Image - Base64 encoded string or image URL' + ) + image_fidelity: Optional[float] = Field( + 0.5, description='Reference intensity for user-uploaded images', ge=0.0, le=1.0 + ) + image_reference: Optional[KlingImageGenImageReferenceType] = None + model_name: Optional[KlingImageGenModelName] = 'kling-v1' + n: Optional[int] = Field(1, description='Number of generated images', ge=1, le=9) + negative_prompt: Optional[str] = Field( + None, description='Negative text prompt', max_length=200 + ) + prompt: str = Field(..., description='Positive text prompt', max_length=500) + + class KlingImageResult(BaseModel): index: Optional[int] = Field(None, description='Image Number (0-9)') url: Optional[AnyUrl] = Field(None, description='URL for generated image') -class KlingVirtualTryOnModelName(str, Enum): - kolors_virtual_try_on_v1 = 'kolors-virtual-try-on-v1' - kolors_virtual_try_on_v1_5 = 'kolors-virtual-try-on-v1-5' - - -class TaskInfo(BaseModel): - external_task_id: Optional[str] = None - - -class TaskResult(BaseModel): - videos: Optional[List[KlingVideoResult]] = None - - -class Data(BaseModel): - task_id: Optional[str] = Field(None, description='Task ID') - task_status: Optional[KlingTaskStatus] = None - task_info: Optional[TaskInfo] = None - created_at: Optional[int] = Field(None, description='Task creation time') - updated_at: Optional[int] = Field(None, description='Task update time') - task_result: Optional[TaskResult] = None - - -class KlingText2VideoResponse(BaseModel): - code: Optional[int] = Field(None, description='Error code') - message: Optional[str] = Field(None, description='Error message') - request_id: Optional[str] = Field(None, description='Request ID') - data: Optional[Data] = None - - -class Trajectory(BaseModel): - x: Optional[int] = Field( - None, - description='The horizontal coordinate of trajectory point. Based on bottom-left corner of image as origin (0,0).', - ) - y: Optional[int] = Field( - None, - description='The vertical coordinate of trajectory point. Based on bottom-left corner of image as origin (0,0).', - ) - - -class DynamicMask(BaseModel): - mask: Optional[AnyUrl] = Field( - None, - description='Dynamic Brush Application Area (Mask image created by users using the motion brush). The aspect ratio must match the input image.', - ) - trajectories: Optional[List[Trajectory]] = None - - -class Data1(BaseModel): - task_id: Optional[str] = Field(None, description='Task ID') - task_status: Optional[KlingTaskStatus] = None - task_info: Optional[TaskInfo] = None - created_at: Optional[int] = Field(None, description='Task creation time') - updated_at: Optional[int] = Field(None, description='Task update time') - task_result: Optional[TaskResult] = None - - -class KlingImage2VideoResponse(BaseModel): - code: Optional[int] = Field(None, description='Error code') - message: Optional[str] = Field(None, description='Error message') - request_id: Optional[str] = Field(None, description='Request ID') - data: Optional[Data1] = None - - -class KlingVideoExtendRequest(BaseModel): - video_id: Optional[str] = Field( - None, - description='The ID of the video to be extended. Supports videos generated by text-to-video, image-to-video, and previous video extension operations. Cannot exceed 3 minutes total duration after extension.', - ) - prompt: Optional[str] = Field( - None, - description='Positive text prompt for guiding the video extension', - max_length=2500, - ) - negative_prompt: Optional[str] = Field( - None, - description='Negative text prompt for elements to avoid in the extended video', - max_length=2500, - ) - cfg_scale: Optional[KlingVideoGenCfgScale] = Field( - default_factory=lambda: KlingVideoGenCfgScale.model_validate(0.5) - ) - callback_url: Optional[AnyUrl] = Field( - None, - description='The callback notification address. Server will notify when the task status changes.', - ) - - -class Data2(BaseModel): - task_id: Optional[str] = Field(None, description='Task ID') - task_status: Optional[KlingTaskStatus] = None - task_info: Optional[TaskInfo] = None - created_at: Optional[int] = Field(None, description='Task creation time') - updated_at: Optional[int] = Field(None, description='Task update time') - task_result: Optional[TaskResult] = None - - -class KlingVideoExtendResponse(BaseModel): - code: Optional[int] = Field(None, description='Error code') - message: Optional[str] = Field(None, description='Error message') - request_id: Optional[str] = Field(None, description='Request ID') - data: Optional[Data2] = None - - -class KlingLipSyncInputObject(BaseModel): - video_id: Optional[str] = Field( - None, - description='The ID of the video generated by Kling AI. Only supports 5-second and 10-second videos generated within the last 30 days.', - ) - video_url: Optional[str] = Field( - None, - description='Get link for uploaded video. Video files support .mp4/.mov, file size does not exceed 100MB, video length between 2-10s.', - ) - mode: KlingLipSyncMode - text: Optional[str] = Field( - None, - description='Text Content for Lip-Sync Video Generation. Required when mode is text2video. Maximum length is 120 characters.', - ) - voice_id: Optional[str] = Field( - None, - description='Voice ID. Required when mode is text2video. The system offers a variety of voice options to choose from.', - ) - voice_language: Optional[KlingLipSyncVoiceLanguage] = 'en' - voice_speed: Optional[float] = Field( - 1, - description='Speech Rate. Valid range: 0.8~2.0, accurate to one decimal place.', - ge=0.8, - le=2.0, - ) - audio_type: Optional[KlingAudioUploadType] = None - audio_file: Optional[str] = Field( - None, - description='Local Path of Audio File. Supported formats: .mp3/.wav/.m4a/.aac, maximum file size of 5MB. Base64 code.', - ) - audio_url: Optional[str] = Field( - None, - description='Audio File Download URL. Supported formats: .mp3/.wav/.m4a/.aac, maximum file size of 5MB.', - ) - - -class KlingLipSyncRequest(BaseModel): - input: KlingLipSyncInputObject - callback_url: Optional[AnyUrl] = Field( - None, - description='The callback notification address. Server will notify when the task status changes.', - ) - - -class Data3(BaseModel): - task_id: Optional[str] = Field(None, description='Task ID') - task_status: Optional[KlingTaskStatus] = None - task_info: Optional[TaskInfo] = None - created_at: Optional[int] = Field(None, description='Task creation time') - updated_at: Optional[int] = Field(None, description='Task update time') - task_result: Optional[TaskResult] = None - - -class KlingLipSyncResponse(BaseModel): - code: Optional[int] = Field(None, description='Error code') - message: Optional[str] = Field(None, description='Error message') - request_id: Optional[str] = Field(None, description='Request ID') - data: Optional[Data3] = None - - -class KlingSingleImageEffectInput(BaseModel): - model_name: KlingSingleImageEffectModelName - image: str = Field( - ..., - description='Reference Image. URL or Base64 encoded string (without data:image prefix). File size cannot exceed 10MB, resolution not less than 300*300px, aspect ratio between 1:2.5 ~ 2.5:1.', - ) - duration: KlingSingleImageEffectDuration - - -class KlingDualCharacterEffectInput(BaseModel): - model_name: Optional[KlingCharacterEffectModelName] = 'kling-v1' - mode: Optional[KlingVideoGenMode] = 'std' - images: KlingDualCharacterImages - duration: KlingVideoGenDuration - - -class Data4(BaseModel): - task_id: Optional[str] = Field(None, description='Task ID') - task_status: Optional[KlingTaskStatus] = None - task_info: Optional[TaskInfo] = None - created_at: Optional[int] = Field(None, description='Task creation time') - updated_at: Optional[int] = Field(None, description='Task update time') - task_result: Optional[TaskResult] = None - - -class KlingVideoEffectsResponse(BaseModel): - code: Optional[int] = Field(None, description='Error code') - message: Optional[str] = Field(None, description='Error message') - request_id: Optional[str] = Field(None, description='Request ID') - data: Optional[Data4] = None - - -class KlingImageGenerationsRequest(BaseModel): - model_name: Optional[KlingImageGenModelName] = 'kling-v1' - prompt: str = Field(..., description='Positive text prompt', max_length=500) - negative_prompt: Optional[str] = Field( - None, description='Negative text prompt', max_length=200 - ) - image: Optional[str] = Field( - None, description='Reference Image - Base64 encoded string or image URL' - ) - image_reference: Optional[KlingImageGenImageReferenceType] = None - image_fidelity: Optional[float] = Field( - 0.5, description='Reference intensity for user-uploaded images', ge=0.0, le=1.0 - ) - human_fidelity: Optional[float] = Field( - 0.45, description='Subject reference similarity', ge=0.0, le=1.0 - ) - n: Optional[int] = Field(1, description='Number of generated images', ge=1, le=9) - aspect_ratio: Optional[KlingImageGenAspectRatio] = '16:9' - callback_url: Optional[AnyUrl] = Field( - None, description='The callback notification address' - ) - - -class TaskResult5(BaseModel): - images: Optional[List[KlingImageResult]] = None - - -class Data5(BaseModel): - task_id: Optional[str] = Field(None, description='Task ID') - task_status: Optional[KlingTaskStatus] = None - task_status_msg: Optional[str] = Field(None, description='Task status information') - created_at: Optional[int] = Field(None, description='Task creation time') - updated_at: Optional[int] = Field(None, description='Task update time') - task_result: Optional[TaskResult5] = None - - -class KlingImageGenerationsResponse(BaseModel): - code: Optional[int] = Field(None, description='Error code') - message: Optional[str] = Field(None, description='Error message') - request_id: Optional[str] = Field(None, description='Request ID') - data: Optional[Data5] = None - - -class KlingVirtualTryOnRequest(BaseModel): - model_name: Optional[KlingVirtualTryOnModelName] = 'kolors-virtual-try-on-v1' - human_image: str = Field( - ..., description='Reference human image - Base64 encoded string or image URL' - ) - cloth_image: Optional[str] = Field( - None, - description='Reference clothing image - Base64 encoded string or image URL', - ) - callback_url: Optional[AnyUrl] = Field( - None, description='The callback notification address' - ) - - -class Data6(BaseModel): - task_id: Optional[str] = Field(None, description='Task ID') - task_status: Optional[KlingTaskStatus] = None - task_status_msg: Optional[str] = Field(None, description='Task status information') - created_at: Optional[int] = Field(None, description='Task creation time') - updated_at: Optional[int] = Field(None, description='Task update time') - task_result: Optional[TaskResult5] = None - - -class KlingVirtualTryOnResponse(BaseModel): - code: Optional[int] = Field(None, description='Error code') - message: Optional[str] = Field(None, description='Error message') - request_id: Optional[str] = Field(None, description='Request ID') - data: Optional[Data6] = None - - -class ResourcePackType(str, Enum): - decreasing_total = 'decreasing_total' - constant_period = 'constant_period' - - -class Status(str, Enum): - toBeOnline = 'toBeOnline' - online = 'online' - expired = 'expired' - runOut = 'runOut' - - -class ResourcePackSubscribeInfo(BaseModel): - resource_pack_name: Optional[str] = Field(None, description='Resource package name') - resource_pack_id: Optional[str] = Field(None, description='Resource package ID') - resource_pack_type: Optional[ResourcePackType] = Field( - None, - description='Resource package type (decreasing_total=decreasing total, constant_period=constant periodicity)', - ) - total_quantity: Optional[float] = Field(None, description='Total quantity') - remaining_quantity: Optional[float] = Field( - None, description='Remaining quantity (updated with a 12-hour delay)' - ) - purchase_time: Optional[int] = Field( - None, description='Purchase time, Unix timestamp in ms' - ) - effective_time: Optional[int] = Field( - None, description='Effective time, Unix timestamp in ms' - ) - invalid_time: Optional[int] = Field( - None, description='Expiration time, Unix timestamp in ms' - ) - status: Optional[Status] = Field(None, description='Resource Package Status') - - -class Data7(BaseModel): - code: Optional[int] = Field(None, description='Error code; 0 indicates success') - msg: Optional[str] = Field(None, description='Error information') - resource_pack_subscribe_infos: Optional[List[ResourcePackSubscribeInfo]] = Field( - None, description='Resource package list' - ) - - -class KlingResourcePackageResponse(BaseModel): - code: Optional[int] = Field(None, description='Error code; 0 indicates success') - message: Optional[str] = Field(None, description='Error information') - request_id: Optional[str] = Field( - None, - description='Request ID, generated by the system, used to track requests and troubleshoot problems', - ) - data: Optional[Data7] = None - - -class Object(str, Enum): - event = 'event' - - -class Type(str, Enum): - payment_intent_succeeded = 'payment_intent.succeeded' - - -class StripeRequestInfo(BaseModel): - id: Optional[str] = None - idempotency_key: Optional[str] = None - - -class Object1(str, Enum): - payment_intent = 'payment_intent' - - -class StripeAmountDetails(BaseModel): - tip: Optional[Dict[str, Any]] = None - - -class Object2(str, Enum): - charge = 'charge' - - -class StripeAddress(BaseModel): - city: Optional[str] = None - country: Optional[str] = None - line1: Optional[str] = None - line2: Optional[str] = None - postal_code: Optional[str] = None - state: Optional[str] = None - - -class StripeOutcome(BaseModel): - advice_code: Optional[Any] = None - network_advice_code: Optional[Any] = None - network_decline_code: Optional[Any] = None - network_status: Optional[str] = None - reason: Optional[Any] = None - risk_level: Optional[str] = None - risk_score: Optional[int] = None - seller_message: Optional[str] = None - type: Optional[str] = None - - -class Checks(BaseModel): - address_line1_check: Optional[Any] = None - address_postal_code_check: Optional[Any] = None - cvc_check: Optional[str] = None - - -class ExtendedAuthorization(BaseModel): - status: Optional[str] = None - - -class IncrementalAuthorization(BaseModel): - status: Optional[str] = None - - -class Multicapture(BaseModel): - status: Optional[str] = None - - -class NetworkToken(BaseModel): - used: Optional[bool] = None - - -class Overcapture(BaseModel): - maximum_amount_capturable: Optional[int] = None - status: Optional[str] = None - - -class StripeCardDetails(BaseModel): - amount_authorized: Optional[int] = None - authorization_code: Optional[Any] = None - brand: Optional[str] = None - checks: Optional[Checks] = None - country: Optional[str] = None - exp_month: Optional[int] = None - exp_year: Optional[int] = None - extended_authorization: Optional[ExtendedAuthorization] = None - fingerprint: Optional[str] = None - funding: Optional[str] = None - incremental_authorization: Optional[IncrementalAuthorization] = None - installments: Optional[Any] = None - last4: Optional[str] = None - mandate: Optional[Any] = None - multicapture: Optional[Multicapture] = None - network: Optional[str] = None - network_token: Optional[NetworkToken] = None - network_transaction_id: Optional[str] = None - overcapture: Optional[Overcapture] = None - regulated_status: Optional[str] = None - three_d_secure: Optional[Any] = None - wallet: Optional[Any] = None - - -class StripeRefundList(BaseModel): - object: Optional[str] = None - data: Optional[List[Dict[str, Any]]] = None - has_more: Optional[bool] = None - total_count: Optional[int] = None - url: Optional[str] = None - - -class Card(BaseModel): - installments: Optional[Any] = None - mandate_options: Optional[Any] = None - network: Optional[Any] = None - request_three_d_secure: Optional[str] = None - - -class StripePaymentMethodOptions(BaseModel): - card: Optional[Card] = None - - -class StripeShipping(BaseModel): - address: Optional[StripeAddress] = None - carrier: Optional[str] = None - name: Optional[str] = None - phone: Optional[str] = None - tracking_number: Optional[str] = None - - -class Model(str, Enum): - T2V_01_Director = 'T2V-01-Director' - I2V_01_Director = 'I2V-01-Director' - S2V_01 = 'S2V-01' - I2V_01 = 'I2V-01' - I2V_01_live = 'I2V-01-live' - T2V_01 = 'T2V-01' - - -class SubjectReferenceItem(BaseModel): - image: Optional[str] = Field( - None, description='URL or base64 encoding of the subject reference image.' - ) - mask: Optional[str] = Field( - None, - description='URL or base64 encoding of the mask for the subject reference image.', - ) - - -class MinimaxVideoGenerationRequest(BaseModel): - model: Model = Field( - ..., - description='Required. ID of model. Options: T2V-01-Director, I2V-01-Director, S2V-01, I2V-01, I2V-01-live, T2V-01', - ) - prompt: Optional[str] = Field( - None, - description='Description of the video. Should be less than 2000 characters. Supports camera movement instructions in [brackets].', - max_length=2000, - ) - prompt_optimizer: Optional[bool] = Field( - True, - description='If true (default), the model will automatically optimize the prompt. Set to false for more precise control.', - ) - first_frame_image: Optional[str] = Field( - None, - description='URL or base64 encoding of the first frame image. Required when model is I2V-01, I2V-01-Director, or I2V-01-live.', - ) - subject_reference: Optional[List[SubjectReferenceItem]] = Field( - None, - description='Only available when model is S2V-01. The model will generate a video based on the subject uploaded through this parameter.', - ) - callback_url: Optional[str] = Field( - None, - description='Optional. URL to receive real-time status updates about the video generation task.', - ) - - -class MinimaxBaseResponse(BaseModel): - status_code: int = Field( - ..., - description='Status code. 0 indicates success, other values indicate errors.', - ) - status_msg: str = Field( - ..., description='Specific error details or success message.' - ) - - -class MinimaxVideoGenerationResponse(BaseModel): - task_id: str = Field( - ..., description='The task ID for the asynchronous video generation task.' - ) - base_resp: MinimaxBaseResponse - - -class File(BaseModel): - file_id: Optional[int] = Field(None, description='Unique identifier for the file') - bytes: Optional[int] = Field(None, description='File size in bytes') - created_at: Optional[int] = Field( - None, description='Unix timestamp when the file was created, in seconds' - ) - filename: Optional[str] = Field(None, description='The name of the file') - purpose: Optional[str] = Field(None, description='The purpose of using the file') - download_url: Optional[str] = Field( - None, description='The URL to download the video' - ) - - -class MinimaxFileRetrieveResponse(BaseModel): - file: File - base_resp: MinimaxBaseResponse - - -class Status1(str, Enum): - Queueing = 'Queueing' - Preparing = 'Preparing' - Processing = 'Processing' - Success = 'Success' - Fail = 'Fail' - - -class MinimaxTaskResultResponse(BaseModel): - task_id: str = Field(..., description='The task ID being queried.') - status: Status1 = Field( - ..., - description="Task status: 'Queueing' (in queue), 'Preparing' (task is preparing), 'Processing' (generating), 'Success' (task completed successfully), or 'Fail' (task failed).", - ) - file_id: Optional[str] = Field( - None, - description='After the task status changes to Success, this field returns the file ID corresponding to the generated video.', - ) - base_resp: MinimaxBaseResponse - - -class OutputFormat(str, Enum): - jpeg = 'jpeg' - png = 'png' - - -class BFLFluxPro11GenerateRequest(BaseModel): - prompt: str = Field(..., description='The main text prompt for image generation') - image_prompt: Optional[str] = Field(None, description='Optional image prompt') - width: int = Field(..., description='Width of the generated image') - height: int = Field(..., description='Height of the generated image') - prompt_upsampling: Optional[bool] = Field( - None, description='Whether to use prompt upsampling' - ) - seed: Optional[int] = Field(None, description='Random seed for reproducibility') - safety_tolerance: Optional[int] = Field(None, description='Safety tolerance level') - output_format: Optional[OutputFormat] = Field( - None, description='Output image format' - ) - webhook_url: Optional[str] = Field( - None, description='Optional webhook URL for async processing' - ) - webhook_secret: Optional[str] = Field( - None, description='Optional webhook secret for async processing' - ) - - -class BFLFluxPro11GenerateResponse(BaseModel): - id: str = Field(..., description='Job ID for tracking') - polling_url: str = Field(..., description='URL to poll for results') - - -class BFLFluxProGenerateRequest(BaseModel): - prompt: str = Field(..., description='The text prompt for image generation.') - negative_prompt: Optional[str] = Field( - None, description='The negative prompt for image generation.' - ) - width: int = Field( - ..., description='The width of the image to generate.', ge=64, le=2048 - ) - height: int = Field( - ..., description='The height of the image to generate.', ge=64, le=2048 - ) - num_inference_steps: Optional[int] = Field( - None, description='The number of inference steps.', ge=1, le=100 - ) - guidance_scale: Optional[float] = Field( - None, description='The guidance scale for generation.', ge=1.0, le=20.0 - ) - seed: Optional[int] = Field(None, description='The seed value for reproducibility.') - num_images: Optional[int] = Field( - None, description='The number of images to generate.', ge=1, le=4 - ) - - -class BFLFluxProGenerateResponse(BaseModel): - id: str = Field(..., description='The unique identifier for the generation task.') - polling_url: str = Field(..., description='URL to poll for the generation result.') - - -class Steps(RootModel[int]): - root: int = Field( - ..., - description='Number of steps for the image generation process', - examples=[50], - ge=15, - le=50, - title='Steps', - ) - - -class Guidance(RootModel[float]): - root: float = Field( - ..., - description='Guidance strength for the image generation process', - ge=1.5, - le=100.0, - title='Guidance', - ) - - -class WebhookUrl(RootModel[AnyUrl]): - root: AnyUrl = Field( - ..., description='URL to receive webhook notifications', title='Webhook Url' - ) - - -class BFLAsyncResponse(BaseModel): - id: str = Field(..., title='Id') - polling_url: str = Field(..., title='Polling Url') - - -class BFLAsyncWebhookResponse(BaseModel): - id: str = Field(..., title='Id') - status: str = Field(..., title='Status') - webhook_url: str = Field(..., title='Webhook Url') - - -class Top(RootModel[int]): - root: int = Field( - ..., - description='Number of pixels to expand at the top of the image', - ge=0, - le=2048, - title='Top', - ) - - -class Bottom(RootModel[int]): - root: int = Field( - ..., - description='Number of pixels to expand at the bottom of the image', - ge=0, - le=2048, - title='Bottom', - ) - - -class Left(RootModel[int]): - root: int = Field( - ..., - description='Number of pixels to expand on the left side of the image', - ge=0, - le=2048, - title='Left', - ) - - -class Right(RootModel[int]): - root: int = Field( - ..., - description='Number of pixels to expand on the right side of the image', - ge=0, - le=2048, - title='Right', - ) - - -class CannyLowThreshold(RootModel[int]): - root: int = Field( - ..., - description='Low threshold for Canny edge detection', - ge=0, - le=500, - title='Canny Low Threshold', - ) - - -class CannyHighThreshold(RootModel[int]): - root: int = Field( - ..., - description='High threshold for Canny edge detection', - ge=0, - le=500, - title='Canny High Threshold', - ) - - -class Steps2(RootModel[int]): - root: int = Field( - ..., - description='Number of steps for the image generation process', - ge=15, - le=50, - title='Steps', - ) - - -class Guidance2(RootModel[float]): - root: float = Field( - ..., - description='Guidance strength for the image generation process', - ge=1.0, - le=100.0, - title='Guidance', - ) - - -class BFLOutputFormat(str, Enum): - jpeg = 'jpeg' - png = 'png' - - -class BFLValidationError(BaseModel): - loc: List[Union[str, int]] = Field(..., title='Location') - msg: str = Field(..., title='Message') - type: str = Field(..., title='Error Type') - - -class Datum2(BaseModel): - image_id: Optional[str] = Field( - None, description='Unique identifier for the generated image' - ) - url: Optional[str] = Field(None, description='URL to access the generated image') - - -class RecraftImageGenerationResponse(BaseModel): - created: int = Field( - ..., description='Unix timestamp when the generation was created' - ) - credits: int = Field(..., description='Number of credits used for the generation') - data: List[Datum2] = Field(..., description='Array of generated image information') - - -class RecraftImageFeatures(BaseModel): - nsfw_score: Optional[float] = None - - -class RecraftTextLayoutItem(BaseModel): - bbox: List[List[float]] - text: str - - -class RecraftImageColor(BaseModel): - rgb: Optional[List[int]] = None - std: Optional[List[float]] = None - weight: Optional[float] = None - - -class RecraftImageStyle(str, Enum): - digital_illustration = 'digital_illustration' - icon = 'icon' - realistic_image = 'realistic_image' - vector_illustration = 'vector_illustration' - - -class RecraftImageSubStyle(str, Enum): - field_2d_art_poster = '2d_art_poster' - field_3d = '3d' - field_80s = '80s' - glow = 'glow' - grain = 'grain' - hand_drawn = 'hand_drawn' - infantile_sketch = 'infantile_sketch' - kawaii = 'kawaii' - pixel_art = 'pixel_art' - psychedelic = 'psychedelic' - seamless = 'seamless' - voxel = 'voxel' - watercolor = 'watercolor' - broken_line = 'broken_line' - colored_outline = 'colored_outline' - colored_shapes = 'colored_shapes' - colored_shapes_gradient = 'colored_shapes_gradient' - doodle_fill = 'doodle_fill' - doodle_offset_fill = 'doodle_offset_fill' - offset_fill = 'offset_fill' - outline = 'outline' - outline_gradient = 'outline_gradient' - uneven_fill = 'uneven_fill' - field_70s = '70s' - cartoon = 'cartoon' - doodle_line_art = 'doodle_line_art' - engraving = 'engraving' - flat_2 = 'flat_2' - kawaii_1 = 'kawaii' - line_art = 'line_art' - linocut = 'linocut' - seamless_1 = 'seamless' - b_and_w = 'b_and_w' - enterprise = 'enterprise' - hard_flash = 'hard_flash' - hdr = 'hdr' - motion_blur = 'motion_blur' - natural_light = 'natural_light' - studio_portrait = 'studio_portrait' - line_circuit = 'line_circuit' - field_2d_art_poster_2 = '2d_art_poster_2' - engraving_color = 'engraving_color' - flat_air_art = 'flat_air_art' - hand_drawn_outline = 'hand_drawn_outline' - handmade_3d = 'handmade_3d' - stickers_drawings = 'stickers_drawings' - plastic = 'plastic' - pictogram = 'pictogram' - - -class RecraftTransformModel(str, Enum): - refm1 = 'refm1' - recraft20b = 'recraft20b' - recraftv2 = 'recraftv2' - recraftv3 = 'recraftv3' - flux1_1pro = 'flux1_1pro' - flux1dev = 'flux1dev' - imagen3 = 'imagen3' - hidream_i1_dev = 'hidream_i1_dev' - - -class RecraftImageFormat(str, Enum): - webp = 'webp' - png = 'png' - - -class RecraftResponseFormat(str, Enum): - url = 'url' - b64_json = 'b64_json' - - -class RecraftImage(BaseModel): - b64_json: Optional[str] = None - features: Optional[RecraftImageFeatures] = None - image_id: UUID - revised_prompt: Optional[str] = None - url: Optional[str] = None - - -class RecraftUserControls(BaseModel): - artistic_level: Optional[int] = None - background_color: Optional[RecraftImageColor] = None - colors: Optional[List[RecraftImageColor]] = None - no_text: Optional[bool] = None - - -class RecraftTextLayout(RootModel[List[RecraftTextLayoutItem]]): - root: List[RecraftTextLayoutItem] - - -class RecraftProcessImageRequest(BaseModel): - image: StrictBytes - image_format: Optional[RecraftImageFormat] = None - response_format: Optional[RecraftResponseFormat] = None - - -class RecraftProcessImageResponse(BaseModel): - created: int - credits: int - image: RecraftImage - - -class RecraftImageToImageRequest(BaseModel): - block_nsfw: Optional[bool] = None - calculate_features: Optional[bool] = None - controls: Optional[RecraftUserControls] = None - image: StrictBytes - image_format: Optional[RecraftImageFormat] = None - model: Optional[RecraftTransformModel] = None - n: Optional[int] = None - negative_prompt: Optional[str] = None - prompt: str - random_seed: Optional[int] = None - response_format: Optional[RecraftResponseFormat] = None - strength: float - style: Optional[RecraftImageStyle] = None - style_id: Optional[UUID] = None - substyle: Optional[RecraftImageSubStyle] = None - text_layout: Optional[RecraftTextLayout] = None - - -class RecraftGenerateImageResponse(BaseModel): - created: int - credits: int - data: List[RecraftImage] - - -class RecraftTransformImageWithMaskRequest(BaseModel): - block_nsfw: Optional[bool] = None - calculate_features: Optional[bool] = None - image: StrictBytes - image_format: Optional[RecraftImageFormat] = None - mask: StrictBytes - model: Optional[RecraftTransformModel] = None - n: Optional[int] = None - negative_prompt: Optional[str] = None - prompt: str - random_seed: Optional[int] = None - response_format: Optional[RecraftResponseFormat] = None - style: Optional[RecraftImageStyle] = None - style_id: Optional[UUID] = None - substyle: Optional[RecraftImageSubStyle] = None - text_layout: Optional[RecraftTextLayout] = None - - -class KlingErrorResponse(BaseModel): - code: int = Field( - ..., - description='- 1000: Authentication failed\n- 1001: Authorization is empty\n- 1002: Authorization is invalid\n- 1003: Authorization is not yet valid\n- 1004: Authorization has expired\n- 1100: Account exception\n- 1101: Account in arrears (postpaid scenario)\n- 1102: Resource pack depleted or expired (prepaid scenario)\n- 1103: Unauthorized access to requested resource\n- 1200: Invalid request parameters\n- 1201: Invalid parameters\n- 1202: Invalid request method\n- 1203: Requested resource does not exist\n- 1300: Trigger platform strategy\n- 1301: Trigger content security policy\n- 1302: API request too frequent\n- 1303: Concurrency/QPS exceeds limit\n- 1304: Trigger IP whitelist policy\n- 5000: Internal server error\n- 5001: Service temporarily unavailable\n- 5002: Server internal timeout\n', - ) - message: str = Field(..., description='Human-readable error message') - request_id: str = Field( - ..., description='Request ID for tracking and troubleshooting' - ) - - -class LumaAspectRatio(str, Enum): - field_1_1 = '1:1' - field_16_9 = '16:9' - field_9_16 = '9:16' - field_4_3 = '4:3' - field_3_4 = '3:4' - field_21_9 = '21:9' - field_9_21 = '9:21' - - -class LumaVideoModel(str, Enum): - ray_2 = 'ray-2' - ray_flash_2 = 'ray-flash-2' - ray_1_6 = 'ray-1-6' - - -class LumaVideoModelOutputResolution1(str, Enum): - field_540p = '540p' - field_720p = '720p' - field_1080p = '1080p' - field_4k = '4k' - - -class LumaVideoModelOutputResolution( - RootModel[Union[LumaVideoModelOutputResolution1, str]] -): - root: Union[LumaVideoModelOutputResolution1, str] - - -class LumaVideoModelOutputDuration1(str, Enum): - field_5s = '5s' - field_9s = '9s' - - -class LumaVideoModelOutputDuration( - RootModel[Union[LumaVideoModelOutputDuration1, str]] -): - root: Union[LumaVideoModelOutputDuration1, str] - - -class LumaImageModel(str, Enum): - photon_1 = 'photon-1' - photon_flash_1 = 'photon-flash-1' - - -class LumaImageRef(BaseModel): - url: Optional[AnyUrl] = Field(None, description='The URL of the image reference') - weight: Optional[float] = Field( - None, description='The weight of the image reference' - ) - - -class LumaImageIdentity(BaseModel): - images: Optional[List[AnyUrl]] = Field( - None, description='The URLs of the image identity' - ) - - -class LumaModifyImageRef(BaseModel): - url: Optional[AnyUrl] = Field(None, description='The URL of the image reference') - weight: Optional[float] = Field( - None, description='The weight of the modify image reference' - ) - - -class Type1(str, Enum): - generation = 'generation' - - -class LumaGenerationReference(BaseModel): - type: Literal['generation'] - id: UUID = Field(..., description='The ID of the generation') - - -class Type2(str, Enum): - image = 'image' - - -class LumaImageReference(BaseModel): - type: Literal['image'] - url: AnyUrl = Field(..., description='The URL of the image') - - -class LumaKeyframe(RootModel[Union[LumaGenerationReference, LumaImageReference]]): - root: Union[LumaGenerationReference, LumaImageReference] = Field( - ..., - description='A keyframe can be either a Generation reference, an Image, or a Video', - discriminator='type', - ) - - -class LumaGenerationType(str, Enum): - video = 'video' - image = 'image' +class KlingLipSyncMode(str, Enum): + text2video = 'text2video' + audio2video = 'audio2video' -class LumaState(str, Enum): - queued = 'queued' - dreaming = 'dreaming' - completed = 'completed' - failed = 'failed' +class KlingLipSyncVoiceLanguage(str, Enum): + zh = 'zh' + en = 'en' -class LumaAssets(BaseModel): - video: Optional[AnyUrl] = Field(None, description='The URL of the video') - image: Optional[AnyUrl] = Field(None, description='The URL of the image') - progress_video: Optional[AnyUrl] = Field( - None, description='The URL of the progress video' - ) +class ResourcePackType(str, Enum): + decreasing_total = 'decreasing_total' + constant_period = 'constant_period' -class GenerationType(str, Enum): - video = 'video' +class Status4(str, Enum): + toBeOnline = 'toBeOnline' + online = 'online' + expired = 'expired' + runOut = 'runOut' -class GenerationType1(str, Enum): - image = 'image' +class ResourcePackSubscribeInfo(BaseModel): + effective_time: Optional[int] = Field( + None, description='Effective time, Unix timestamp in ms' + ) + invalid_time: Optional[int] = Field( + None, description='Expiration time, Unix timestamp in ms' + ) + purchase_time: Optional[int] = Field( + None, description='Purchase time, Unix timestamp in ms' + ) + remaining_quantity: Optional[float] = Field( + None, description='Remaining quantity (updated with a 12-hour delay)' + ) + resource_pack_id: Optional[str] = Field(None, description='Resource package ID') + resource_pack_name: Optional[str] = Field(None, description='Resource package name') + resource_pack_type: Optional[ResourcePackType] = Field( + None, + description='Resource package type (decreasing_total=decreasing total, constant_period=constant periodicity)', + ) + status: Optional[Status4] = Field(None, description='Resource Package Status') + total_quantity: Optional[float] = Field(None, description='Total quantity') -class CharacterRef(BaseModel): - identity0: Optional[LumaImageIdentity] = None +class Data3(BaseModel): + code: Optional[int] = Field(None, description='Error code; 0 indicates success') + msg: Optional[str] = Field(None, description='Error information') + resource_pack_subscribe_infos: Optional[List[ResourcePackSubscribeInfo]] = Field( + None, description='Resource package list' + ) -class LumaImageGenerationRequest(BaseModel): - generation_type: Optional[GenerationType1] = 'image' - model: Optional[LumaImageModel] = 'photon-1' - prompt: Optional[str] = Field(None, description='The prompt of the generation') - aspect_ratio: Optional[LumaAspectRatio] = '16:9' - callback_url: Optional[AnyUrl] = Field( - None, description='The callback URL for the generation' +class KlingResourcePackageResponse(BaseModel): + code: Optional[int] = Field(None, description='Error code; 0 indicates success') + data: Optional[Data3] = None + message: Optional[str] = Field(None, description='Error information') + request_id: Optional[str] = Field( + None, + description='Request ID, generated by the system, used to track requests and troubleshoot problems', ) - image_ref: Optional[List[LumaImageRef]] = None - style_ref: Optional[List[LumaImageRef]] = None - character_ref: Optional[CharacterRef] = None - modify_image_ref: Optional[LumaModifyImageRef] = None -class GenerationType2(str, Enum): - upscale_video = 'upscale_video' +class KlingSingleImageEffectDuration(str, Enum): + field_5 = '5' -class LumaUpscaleVideoGenerationRequest(BaseModel): - generation_type: Optional[GenerationType2] = 'upscale_video' - resolution: Optional[LumaVideoModelOutputResolution] = None - callback_url: Optional[AnyUrl] = Field( - None, description='The callback URL for the upscale' - ) +class KlingSingleImageEffectModelName(str, Enum): + kling_v1_6 = 'kling-v1-6' -class GenerationType3(str, Enum): - add_audio = 'add_audio' +class KlingSingleImageEffectsScene(str, Enum): + bloombloom = 'bloombloom' + dizzydizzy = 'dizzydizzy' + fuzzyfuzzy = 'fuzzyfuzzy' + squish = 'squish' + expansion = 'expansion' -class LumaAudioGenerationRequest(BaseModel): - generation_type: Optional[GenerationType3] = 'add_audio' - prompt: Optional[str] = Field(None, description='The prompt of the audio') - negative_prompt: Optional[str] = Field( - None, description='The negative prompt of the audio' - ) - callback_url: Optional[AnyUrl] = Field( - None, description='The callback URL for the audio' - ) +class KlingTaskStatus(str, Enum): + submitted = 'submitted' + processing = 'processing' + succeed = 'succeed' + failed = 'failed' -class LumaError(BaseModel): - detail: Optional[str] = Field(None, description='The error message') +class KlingTextToVideoModelName(str, Enum): + kling_v1 = 'kling-v1' + kling_v1_6 = 'kling-v1-6' -class AspectRatio(str, Enum): +class KlingVideoGenAspectRatio(str, Enum): field_16_9 = '16:9' - field_4_3 = '4:3' - field_1_1 = '1:1' - field_3_4 = '3:4' field_9_16 = '9:16' + field_1_1 = '1:1' -class Duration(int, Enum): - integer_5 = 5 - integer_8 = 8 +class KlingVideoGenCfgScale(RootModel[float]): + root: float = Field( + ..., + description="Flexibility in video generation. The higher the value, the lower the model's degree of flexibility, and the stronger the relevance to the user's prompt.", + ge=0.0, + le=1.0, + ) -class Model1(str, Enum): - v3_5 = 'v3.5' +class KlingVideoGenDuration(str, Enum): + field_5 = '5' + field_10 = '10' -class MotionMode(str, Enum): - normal = 'normal' - fast = 'fast' +class KlingVideoGenMode(str, Enum): + std = 'std' + pro = 'pro' -class Quality(str, Enum): - field_360p = '360p' - field_540p = '540p' - field_720p = '720p' - field_1080p = '1080p' +class KlingVideoGenModelName(str, Enum): + kling_v1 = 'kling-v1' + kling_v1_5 = 'kling-v1-5' + kling_v1_6 = 'kling-v1-6' + kling_v2_master = 'kling-v2-master' -class Style(str, Enum): - anime = 'anime' - field_3d_animation = '3d_animation' - clay = 'clay' - comic = 'comic' - cyberpunk = 'cyberpunk' +class KlingVideoResult(BaseModel): + duration: Optional[str] = Field(None, description='Total video duration') + id: Optional[str] = Field(None, description='Generated video ID') + url: Optional[AnyUrl] = Field(None, description='URL for generated video') -class PixverseTextVideoRequest(BaseModel): - aspect_ratio: AspectRatio - duration: Duration - model: Model1 - motion_mode: Optional[MotionMode] = None - negative_prompt: Optional[str] = None - prompt: str - quality: Quality - seed: Optional[int] = None - style: Optional[Style] = None - template_id: Optional[int] = None - water_mark: Optional[bool] = None +class KlingVirtualTryOnModelName(str, Enum): + kolors_virtual_try_on_v1 = 'kolors-virtual-try-on-v1' + kolors_virtual_try_on_v1_5 = 'kolors-virtual-try-on-v1-5' -class Resp(BaseModel): - video_id: Optional[int] = None +class KlingVirtualTryOnRequest(BaseModel): + callback_url: Optional[AnyUrl] = Field( + None, description='The callback notification address' + ) + cloth_image: Optional[str] = Field( + None, + description='Reference clothing image - Base64 encoded string or image URL', + ) + human_image: str = Field( + ..., description='Reference human image - Base64 encoded string or image URL' + ) + model_name: Optional[KlingVirtualTryOnModelName] = 'kolors-virtual-try-on-v1' -class PixverseVideoResponse(BaseModel): - ErrCode: Optional[int] = None - ErrMsg: Optional[str] = None - Resp_1: Optional[Resp] = Field(None, alias='Resp') +class TaskResult6(BaseModel): + images: Optional[List[KlingImageResult]] = None -class Resp1(BaseModel): - img_id: Optional[int] = None +class Data7(BaseModel): + created_at: Optional[int] = Field(None, description='Task creation time') + task_id: Optional[str] = Field(None, description='Task ID') + task_result: Optional[TaskResult6] = None + task_status: Optional[KlingTaskStatus] = None + task_status_msg: Optional[str] = Field(None, description='Task status information') + updated_at: Optional[int] = Field(None, description='Task update time') -class PixverseImageUploadResponse(BaseModel): - ErrCode: Optional[int] = None - ErrMsg: Optional[str] = None - Resp: Optional[Resp1] = None +class KlingVirtualTryOnResponse(BaseModel): + code: Optional[int] = Field(None, description='Error code') + data: Optional[Data7] = None + message: Optional[str] = Field(None, description='Error message') + request_id: Optional[str] = Field(None, description='Request ID') -class PixverseImageVideoRequest(BaseModel): - img_id: int - model: Model1 - prompt: str - duration: Duration - quality: Quality - motion_mode: Optional[MotionMode] = None - seed: Optional[int] = None - style: Optional[Style] = None - template_id: Optional[int] = None - water_mark: Optional[bool] = None +class LumaAspectRatio(str, Enum): + field_1_1 = '1:1' + field_16_9 = '16:9' + field_9_16 = '9:16' + field_4_3 = '4:3' + field_3_4 = '3:4' + field_21_9 = '21:9' + field_9_21 = '9:21' -class PixverseTransitionVideoRequest(BaseModel): - first_frame_img: int - last_frame_img: int - model: Model1 - duration: Duration - quality: Quality - motion_mode: MotionMode - seed: int - prompt: str - style: Optional[Style] = None - template_id: Optional[int] = None - water_mark: Optional[bool] = None +class LumaAssets(BaseModel): + image: Optional[AnyUrl] = Field(None, description='The URL of the image') + progress_video: Optional[AnyUrl] = Field( + None, description='The URL of the progress video' + ) + video: Optional[AnyUrl] = Field(None, description='The URL of the video') -class Status2(int, Enum): - integer_1 = 1 - integer_5 = 5 - integer_6 = 6 - integer_7 = 7 - integer_8 = 8 +class GenerationType(str, Enum): + add_audio = 'add_audio' -class Resp2(BaseModel): - create_time: Optional[str] = None - id: Optional[int] = None - modify_time: Optional[str] = None - negative_prompt: Optional[str] = None - outputHeight: Optional[int] = None - outputWidth: Optional[int] = None - prompt: Optional[str] = None - resolution_ratio: Optional[int] = None - seed: Optional[int] = None - size: Optional[int] = None - status: Optional[Status2] = Field( - None, - description='Video generation status codes:\n* 1 - Generation successful\n* 5 - Generating\n* 6 - Deleted\n* 7 - Contents moderation failed\n* 8 - Generation failed\n', +class LumaAudioGenerationRequest(BaseModel): + callback_url: Optional[AnyUrl] = Field( + None, description='The callback URL for the audio' ) - style: Optional[str] = None - url: Optional[str] = None + generation_type: Optional[GenerationType] = 'add_audio' + negative_prompt: Optional[str] = Field( + None, description='The negative prompt of the audio' + ) + prompt: Optional[str] = Field(None, description='The prompt of the audio') + + +class LumaError(BaseModel): + detail: Optional[str] = Field(None, description='The error message') + + +class Type11(str, Enum): + generation = 'generation' -class PixverseVideoResultResponse(BaseModel): - ErrCode: Optional[int] = None - ErrMsg: Optional[str] = None - Resp: Optional[Resp2] = None +class LumaGenerationReference(BaseModel): + id: UUID = Field(..., description='The ID of the generation') + type: Literal['generation'] -class Image(BaseModel): - bytesBase64Encoded: str - gcsUri: Optional[str] = None - mimeType: Optional[str] = None +class GenerationType1(str, Enum): + video = 'video' -class Image1(BaseModel): - bytesBase64Encoded: Optional[str] = None - gcsUri: str - mimeType: Optional[str] = None +class LumaGenerationType(str, Enum): + video = 'video' + image = 'image' -class Instance(BaseModel): - prompt: str = Field(..., description='Text description of the video') - image: Optional[Union[Image, Image1]] = Field( - None, description='Optional image to guide video generation' +class GenerationType2(str, Enum): + image = 'image' + + +class LumaImageIdentity(BaseModel): + images: Optional[List[AnyUrl]] = Field( + None, description='The URLs of the image identity' ) -class PersonGeneration(str, Enum): - ALLOW = 'ALLOW' - BLOCK = 'BLOCK' +class LumaImageModel(str, Enum): + photon_1 = 'photon-1' + photon_flash_1 = 'photon-flash-1' -class Parameters(BaseModel): - aspectRatio: Optional[str] = Field(None, examples=['16:9']) - negativePrompt: Optional[str] = None - personGeneration: Optional[PersonGeneration] = None - sampleCount: Optional[int] = None - seed: Optional[int] = None - storageUri: Optional[str] = Field( - None, description='Optional Cloud Storage URI to upload the video' +class LumaImageRef(BaseModel): + url: Optional[AnyUrl] = Field(None, description='The URL of the image reference') + weight: Optional[float] = Field( + None, description='The weight of the image reference' ) - durationSeconds: Optional[int] = None - enhancePrompt: Optional[bool] = None -class Veo2GenVidRequest(BaseModel): - instances: Optional[List[Instance]] = None - parameters: Optional[Parameters] = None +class Type12(str, Enum): + image = 'image' -class Veo2GenVidResponse(BaseModel): - name: str = Field( - ..., - description='Operation resource name', - examples=[ - 'projects/PROJECT_ID/locations/us-central1/publishers/google/models/MODEL_ID/operations/a1b07c8e-7b5a-4aba-bb34-3e1ccb8afcc8' - ], - ) +class LumaImageReference(BaseModel): + type: Literal['image'] + url: AnyUrl = Field(..., description='The URL of the image') -class Veo2GenVidPollRequest(BaseModel): - operationName: str = Field( +class LumaKeyframe(RootModel[Union[LumaGenerationReference, LumaImageReference]]): + root: Union[LumaGenerationReference, LumaImageReference] = Field( ..., - description='Full operation name (from predict response)', - examples=[ - 'projects/PROJECT_ID/locations/us-central1/publishers/google/models/MODEL_ID/operations/OPERATION_ID' - ], + description='A keyframe can be either a Generation reference, an Image, or a Video', + discriminator='type', ) -class Video(BaseModel): - gcsUri: Optional[str] = Field(None, description='Cloud Storage URI of the video') - bytesBase64Encoded: Optional[str] = Field( - None, description='Base64-encoded video content' - ) - mimeType: Optional[str] = Field(None, description='Video MIME type') +class LumaKeyframes(BaseModel): + frame0: Optional[LumaKeyframe] = None + frame1: Optional[LumaKeyframe] = None -class Response(BaseModel): - field_type: Optional[str] = Field( - None, - alias='@type', - examples=[ - 'type.googleapis.com/cloud.ai.large_models.vision.GenerateVideoResponse' - ], - ) - raiMediaFilteredCount: Optional[int] = Field( - None, description='Count of media filtered by responsible AI policies' - ) - raiMediaFilteredReasons: Optional[List[str]] = Field( - None, description='Reasons why media was filtered by responsible AI policies' +class LumaModifyImageRef(BaseModel): + url: Optional[AnyUrl] = Field(None, description='The URL of the image reference') + weight: Optional[float] = Field( + None, description='The weight of the modify image reference' ) - videos: Optional[List[Video]] = None -class Error1(BaseModel): - code: Optional[int] = Field(None, description='Error code') - message: Optional[str] = Field(None, description='Error message') +class LumaState(str, Enum): + queued = 'queued' + dreaming = 'dreaming' + completed = 'completed' + failed = 'failed' -class Veo2GenVidPollResponse(BaseModel): - name: Optional[str] = None - done: Optional[bool] = None - response: Optional[Response] = Field( - None, description='The actual prediction response if done is true' - ) - error: Optional[Error1] = Field( - None, description='Error details if operation failed' - ) +class GenerationType3(str, Enum): + upscale_video = 'upscale_video' -class RunwayImageToVideoResponse(BaseModel): - id: Optional[str] = Field(None, description='Task ID') +class LumaVideoModel(str, Enum): + ray_2 = 'ray-2' + ray_flash_2 = 'ray-flash-2' + ray_1_6 = 'ray-1-6' -class RunwayTaskStatusEnum(str, Enum): - SUCCEEDED = 'SUCCEEDED' - RUNNING = 'RUNNING' - FAILED = 'FAILED' - PENDING = 'PENDING' - CANCELLED = 'CANCELLED' - THROTTLED = 'THROTTLED' +class LumaVideoModelOutputDuration1(str, Enum): + field_5s = '5s' + field_9s = '9s' -class RunwayModelEnum(str, Enum): - gen4_turbo = 'gen4_turbo' - gen3a_turbo = 'gen3a_turbo' +class LumaVideoModelOutputDuration( + RootModel[Union[LumaVideoModelOutputDuration1, str]] +): + root: Union[LumaVideoModelOutputDuration1, str] -class Position(str, Enum): - first = 'first' - last = 'last' +class LumaVideoModelOutputResolution1(str, Enum): + field_540p = '540p' + field_720p = '720p' + field_1080p = '1080p' + field_4k = '4k' -class RunwayPromptImageDetailedObject(BaseModel): - uri: str = Field( - ..., description='A HTTPS URL or data URI containing an encoded image.' +class LumaVideoModelOutputResolution( + RootModel[Union[LumaVideoModelOutputResolution1, str]] +): + root: Union[LumaVideoModelOutputResolution1, str] + + +class MinimaxBaseResponse(BaseModel): + status_code: int = Field( + ..., + description='Status code. 0 indicates success, other values indicate errors.', ) - position: Position = Field( + status_msg: str = Field( + ..., description='Specific error details or success message.' + ) + + +class File(BaseModel): + bytes: Optional[int] = Field(None, description='File size in bytes') + created_at: Optional[int] = Field( + None, description='Unix timestamp when the file was created, in seconds' + ) + download_url: Optional[str] = Field( + None, description='The URL to download the video' + ) + file_id: Optional[int] = Field(None, description='Unique identifier for the file') + filename: Optional[str] = Field(None, description='The name of the file') + purpose: Optional[str] = Field(None, description='The purpose of using the file') + + +class MinimaxFileRetrieveResponse(BaseModel): + base_resp: MinimaxBaseResponse + file: File + + +class Status5(str, Enum): + Queueing = 'Queueing' + Preparing = 'Preparing' + Processing = 'Processing' + Success = 'Success' + Fail = 'Fail' + + +class MinimaxTaskResultResponse(BaseModel): + base_resp: MinimaxBaseResponse + file_id: Optional[str] = Field( + None, + description='After the task status changes to Success, this field returns the file ID corresponding to the generated video.', + ) + status: Status5 = Field( ..., - description="The position of the image in the output video. 'last' is currently supported for gen3a_turbo only.", + description="Task status: 'Queueing' (in queue), 'Preparing' (task is preparing), 'Processing' (generating), 'Success' (task completed successfully), or 'Fail' (task failed).", ) + task_id: str = Field(..., description='The task ID being queried.') -class RunwayDurationEnum(int, Enum): - integer_5 = 5 - integer_10 = 10 +class Model(str, Enum): + T2V_01_Director = 'T2V-01-Director' + I2V_01_Director = 'I2V-01-Director' + S2V_01 = 'S2V-01' + I2V_01 = 'I2V-01' + I2V_01_live = 'I2V-01-live' + T2V_01 = 'T2V-01' -class RunwayAspectRatioEnum(str, Enum): - field_1280_720 = '1280:720' - field_720_1280 = '720:1280' - field_1104_832 = '1104:832' - field_832_1104 = '832:1104' - field_960_960 = '960:960' - field_1584_672 = '1584:672' - field_1280_768 = '1280:768' - field_768_1280 = '768:1280' +class SubjectReferenceItem(BaseModel): + image: Optional[str] = Field( + None, description='URL or base64 encoding of the subject reference image.' + ) + mask: Optional[str] = Field( + None, + description='URL or base64 encoding of the mask for the subject reference image.', + ) -class RunwayPromptImageObject( - RootModel[Union[str, List[RunwayPromptImageDetailedObject]]] -): - root: Union[str, List[RunwayPromptImageDetailedObject]] = Field( +class MinimaxVideoGenerationRequest(BaseModel): + callback_url: Optional[str] = Field( + None, + description='Optional. URL to receive real-time status updates about the video generation task.', + ) + first_frame_image: Optional[str] = Field( + None, + description='URL or base64 encoding of the first frame image. Required when model is I2V-01, I2V-01-Director, or I2V-01-live.', + ) + model: Model = Field( ..., - description='Image(s) to use for the video generation. Can be a single URI or an array of image objects with positions.', + description='Required. ID of model. Options: T2V-01-Director, I2V-01-Director, S2V-01, I2V-01, I2V-01-live, T2V-01', + ) + prompt: Optional[str] = Field( + None, + description='Description of the video. Should be less than 2000 characters. Supports camera movement instructions in [brackets].', + max_length=2000, + ) + prompt_optimizer: Optional[bool] = Field( + True, + description='If true (default), the model will automatically optimize the prompt. Set to false for more precise control.', + ) + subject_reference: Optional[List[SubjectReferenceItem]] = Field( + None, + description='Only available when model is S2V-01. The model will generate a video based on the subject uploaded through this parameter.', ) -class Datum3(BaseModel): - b64_json: Optional[str] = Field(None, description='Base64 encoded image data') - url: Optional[str] = Field(None, description='URL of the image') - revised_prompt: Optional[str] = Field(None, description='Revised prompt') - - -class InputTokensDetails(BaseModel): - text_tokens: Optional[int] = None - image_tokens: Optional[int] = None +class MinimaxVideoGenerationResponse(BaseModel): + base_resp: MinimaxBaseResponse + task_id: str = Field( + ..., description='The task ID for the asynchronous video generation task.' + ) -class Usage(BaseModel): - input_tokens: Optional[int] = None - input_tokens_details: Optional[InputTokensDetails] = None - output_tokens: Optional[int] = None - total_tokens: Optional[int] = None +class Truncation(str, Enum): + disabled = 'disabled' + auto = 'auto' -class OpenAIImageGenerationResponse(BaseModel): - data: Optional[List[Datum3]] = None - usage: Optional[Usage] = None +class ModelResponseProperties(BaseModel): + instructions: Optional[str] = Field( + None, description='Instructions for the model on how to generate the response' + ) + max_output_tokens: Optional[int] = Field( + None, description='Maximum number of tokens to generate' + ) + model: Optional[str] = Field( + None, description='The model used to generate the response' + ) + temperature: Optional[float] = Field( + 1, description='Controls randomness in the response', ge=0.0, le=2.0 + ) + top_p: Optional[float] = Field( + 1, + description='Controls diversity of the response via nucleus sampling', + ge=0.0, + le=1.0, + ) + truncation: Optional[Truncation] = Field( + 'disabled', description='How to handle truncation of the response' + ) -class Quality3(str, Enum): +class Moderation(str, Enum): low = 'low' - medium = 'medium' - high = 'high' - standard = 'standard' - hd = 'hd' + auto = 'auto' class OutputFormat1(str, Enum): @@ -2005,9 +1366,41 @@ class OutputFormat1(str, Enum): jpeg = 'jpeg' -class Moderation(str, Enum): - low = 'low' - auto = 'auto' +class OpenAIImageEditRequest(BaseModel): + background: Optional[str] = Field( + None, description='Background transparency', examples=['opaque'] + ) + model: str = Field( + ..., description='The model to use for image editing', examples=['gpt-image-1'] + ) + moderation: Optional[Moderation] = Field( + None, description='Content moderation setting', examples=['auto'] + ) + n: Optional[int] = Field( + None, description='The number of images to generate', examples=[1] + ) + output_compression: Optional[int] = Field( + None, description='Compression level for JPEG or WebP (0-100)', examples=[100] + ) + output_format: Optional[OutputFormat1] = Field( + None, description='Format of the output image', examples=['png'] + ) + prompt: str = Field( + ..., + description='A text description of the desired edit', + examples=['Give the rocketship rainbow coloring'], + ) + quality: Optional[str] = Field( + None, description='The quality of the edited image', examples=['low'] + ) + size: Optional[str] = Field( + None, description='Size of the output image', examples=['1024x1024'] + ) + user: Optional[str] = Field( + None, + description='A unique identifier for end-user monitoring', + examples=['user-1234'], + ) class Background(str, Enum): @@ -2015,54 +1408,62 @@ class Background(str, Enum): opaque = 'opaque' +class Quality(str, Enum): + low = 'low' + medium = 'medium' + high = 'high' + standard = 'standard' + hd = 'hd' + + class ResponseFormat(str, Enum): url = 'url' b64_json = 'b64_json' -class Style3(str, Enum): +class Style(str, Enum): vivid = 'vivid' natural = 'natural' class OpenAIImageGenerationRequest(BaseModel): + background: Optional[Background] = Field( + None, description='Background transparency', examples=['opaque'] + ) model: Optional[str] = Field( None, description='The model to use for image generation', examples=['dall-e-3'] ) - prompt: str = Field( - ..., - description='A text description of the desired image', - examples=['Draw a rocket in front of a blackhole in deep space'], + moderation: Optional[Moderation] = Field( + None, description='Content moderation setting', examples=['auto'] ) n: Optional[int] = Field( None, description='The number of images to generate (1-10). Only 1 supported for dall-e-3.', examples=[1], ) - quality: Optional[Quality3] = Field( - None, description='The quality of the generated image', examples=['high'] - ) - size: Optional[str] = Field( - None, - description='Size of the image (e.g., 1024x1024, 1536x1024, auto)', - examples=['1024x1536'], + output_compression: Optional[int] = Field( + None, description='Compression level for JPEG or WebP (0-100)', examples=[100] ) output_format: Optional[OutputFormat1] = Field( None, description='Format of the output image', examples=['png'] ) - output_compression: Optional[int] = Field( - None, description='Compression level for JPEG or WebP (0-100)', examples=[100] - ) - moderation: Optional[Moderation] = Field( - None, description='Content moderation setting', examples=['auto'] + prompt: str = Field( + ..., + description='A text description of the desired image', + examples=['Draw a rocket in front of a blackhole in deep space'], ) - background: Optional[Background] = Field( - None, description='Background transparency', examples=['opaque'] + quality: Optional[Quality] = Field( + None, description='The quality of the generated image', examples=['high'] ) response_format: Optional[ResponseFormat] = Field( None, description='Response format of image data', examples=['b64_json'] ) - style: Optional[Style3] = Field( + size: Optional[str] = Field( + None, + description='Size of the image (e.g., 1024x1024, 1536x1024, auto)', + examples=['1024x1536'], + ) + style: Optional[Style] = Field( None, description='Style of the image (only for dall-e-3)', examples=['vivid'] ) user: Optional[str] = Field( @@ -2072,103 +1473,176 @@ class OpenAIImageGenerationRequest(BaseModel): ) -class OpenAIImageEditRequest(BaseModel): - model: str = Field( - ..., description='The model to use for image editing', examples=['gpt-image-1'] - ) - prompt: str = Field( - ..., - description='A text description of the desired edit', - examples=['Give the rocketship rainbow coloring'], - ) - n: Optional[int] = Field( - None, description='The number of images to generate', examples=[1] - ) - quality: Optional[str] = Field( - None, description='The quality of the edited image', examples=['low'] - ) - size: Optional[str] = Field( - None, description='Size of the output image', examples=['1024x1024'] - ) - output_format: Optional[OutputFormat1] = Field( - None, description='Format of the output image', examples=['png'] - ) - output_compression: Optional[int] = Field( - None, description='Compression level for JPEG or WebP (0-100)', examples=[100] - ) - moderation: Optional[Moderation] = Field( - None, description='Content moderation setting', examples=['auto'] - ) - background: Optional[str] = Field( - None, description='Background transparency', examples=['opaque'] - ) - user: Optional[str] = Field( - None, - description='A unique identifier for end-user monitoring', - examples=['user-1234'], - ) +class Datum2(BaseModel): + b64_json: Optional[str] = Field(None, description='Base64 encoded image data') + revised_prompt: Optional[str] = Field(None, description='Revised prompt') + url: Optional[str] = Field(None, description='URL of the image') -class CustomerStorageResourceResponse(BaseModel): - download_url: Optional[str] = Field( - None, - description='The signed URL to use for downloading the file from the specified path', - ) - upload_url: Optional[str] = Field( - None, - description='The signed URL to use for uploading the file to the specified path', - ) - expires_at: Optional[datetime] = Field( - None, description='When the signed URL will expire' +class InputTokensDetails(BaseModel): + image_tokens: Optional[int] = None + text_tokens: Optional[int] = None + + +class Usage(BaseModel): + input_tokens: Optional[int] = None + input_tokens_details: Optional[InputTokensDetails] = None + output_tokens: Optional[int] = None + total_tokens: Optional[int] = None + + +class OpenAIImageGenerationResponse(BaseModel): + data: Optional[List[Datum2]] = None + usage: Optional[Usage] = None + + +class OpenAIModels(str, Enum): + gpt_4 = 'gpt-4' + gpt_4_0314 = 'gpt-4-0314' + gpt_4_0613 = 'gpt-4-0613' + gpt_4_32k = 'gpt-4-32k' + gpt_4_32k_0314 = 'gpt-4-32k-0314' + gpt_4_32k_0613 = 'gpt-4-32k-0613' + gpt_4_0125_preview = 'gpt-4-0125-preview' + gpt_4_turbo = 'gpt-4-turbo' + gpt_4_turbo_2024_04_09 = 'gpt-4-turbo-2024-04-09' + gpt_4_turbo_preview = 'gpt-4-turbo-preview' + gpt_4_1106_preview = 'gpt-4-1106-preview' + gpt_4_vision_preview = 'gpt-4-vision-preview' + gpt_3_5_turbo = 'gpt-3.5-turbo' + gpt_3_5_turbo_16k = 'gpt-3.5-turbo-16k' + gpt_3_5_turbo_0301 = 'gpt-3.5-turbo-0301' + gpt_3_5_turbo_0613 = 'gpt-3.5-turbo-0613' + gpt_3_5_turbo_1106 = 'gpt-3.5-turbo-1106' + gpt_3_5_turbo_0125 = 'gpt-3.5-turbo-0125' + gpt_3_5_turbo_16k_0613 = 'gpt-3.5-turbo-16k-0613' + gpt_4_1 = 'gpt-4.1' + gpt_4_1_mini = 'gpt-4.1-mini' + gpt_4_1_nano = 'gpt-4.1-nano' + gpt_4_1_2025_04_14 = 'gpt-4.1-2025-04-14' + gpt_4_1_mini_2025_04_14 = 'gpt-4.1-mini-2025-04-14' + gpt_4_1_nano_2025_04_14 = 'gpt-4.1-nano-2025-04-14' + o1 = 'o1' + o1_mini = 'o1-mini' + o1_preview = 'o1-preview' + o1_pro = 'o1-pro' + o1_2024_12_17 = 'o1-2024-12-17' + o1_preview_2024_09_12 = 'o1-preview-2024-09-12' + o1_mini_2024_09_12 = 'o1-mini-2024-09-12' + o1_pro_2025_03_19 = 'o1-pro-2025-03-19' + o3 = 'o3' + o3_mini = 'o3-mini' + o3_2025_04_16 = 'o3-2025-04-16' + o3_mini_2025_01_31 = 'o3-mini-2025-01-31' + o4_mini = 'o4-mini' + o4_mini_2025_04_16 = 'o4-mini-2025-04-16' + gpt_4o = 'gpt-4o' + gpt_4o_mini = 'gpt-4o-mini' + gpt_4o_2024_11_20 = 'gpt-4o-2024-11-20' + gpt_4o_2024_08_06 = 'gpt-4o-2024-08-06' + gpt_4o_2024_05_13 = 'gpt-4o-2024-05-13' + gpt_4o_mini_2024_07_18 = 'gpt-4o-mini-2024-07-18' + gpt_4o_audio_preview = 'gpt-4o-audio-preview' + gpt_4o_audio_preview_2024_10_01 = 'gpt-4o-audio-preview-2024-10-01' + gpt_4o_audio_preview_2024_12_17 = 'gpt-4o-audio-preview-2024-12-17' + gpt_4o_mini_audio_preview = 'gpt-4o-mini-audio-preview' + gpt_4o_mini_audio_preview_2024_12_17 = 'gpt-4o-mini-audio-preview-2024-12-17' + gpt_4o_search_preview = 'gpt-4o-search-preview' + gpt_4o_mini_search_preview = 'gpt-4o-mini-search-preview' + gpt_4o_search_preview_2025_03_11 = 'gpt-4o-search-preview-2025-03-11' + gpt_4o_mini_search_preview_2025_03_11 = 'gpt-4o-mini-search-preview-2025-03-11' + computer_use_preview = 'computer-use-preview' + computer_use_preview_2025_03_11 = 'computer-use-preview-2025-03-11' + chatgpt_4o_latest = 'chatgpt-4o-latest' + + +class Reason(str, Enum): + max_output_tokens = 'max_output_tokens' + content_filter = 'content_filter' + + +class IncompleteDetails(BaseModel): + reason: Optional[Reason] = Field( + None, description='The reason why the response is incomplete.' ) - existing_file: Optional[bool] = Field( - None, description='Whether an existing file with the same hash was found' + + +class Object(str, Enum): + response = 'response' + + +class Status6(str, Enum): + completed = 'completed' + failed = 'failed' + in_progress = 'in_progress' + incomplete = 'incomplete' + + +class Type13(str, Enum): + output_audio = 'output_audio' + + +class OutputAudioContent(BaseModel): + data: str = Field(..., description='Base64-encoded audio data') + transcript: str = Field(..., description='Transcript of the audio') + type: Type13 = Field(..., description='The type of output content') + + +class Role4(str, Enum): + assistant = 'assistant' + + +class Type14(str, Enum): + message = 'message' + + +class Type15(str, Enum): + output_text = 'output_text' + + +class OutputTextContent(BaseModel): + text: str = Field(..., description='The text content') + type: Type15 = Field(..., description='The type of output content') + + +class AspectRatio1(RootModel[float]): + root: float = Field( + ..., + description='Aspect ratio (width / height)', + ge=0.4, + le=2.5, + title='Aspectratio', ) -class Pikaffect(str, Enum): - Cake_ify = 'Cake-ify' - Crumble = 'Crumble' - Crush = 'Crush' - Decapitate = 'Decapitate' - Deflate = 'Deflate' - Dissolve = 'Dissolve' - Explode = 'Explode' - Eye_pop = 'Eye-pop' - Inflate = 'Inflate' - Levitate = 'Levitate' - Melt = 'Melt' - Peel = 'Peel' - Poke = 'Poke' - Squish = 'Squish' - Ta_da = 'Ta-da' - Tear = 'Tear' +class IngredientsMode(str, Enum): + creative = 'creative' + precise = 'precise' -class PikaBodyGeneratePikaffectsGeneratePikaffectsPost(BaseModel): - image: Optional[StrictBytes] = Field(None, title='Image') - pikaffect: Optional[Pikaffect] = Field(None, title='Pikaffect') - promptText: Optional[str] = Field(None, title='Prompttext') +class PikaBodyGenerate22C2vGenerate22PikascenesPost(BaseModel): + aspectRatio: Optional[AspectRatio1] = Field( + None, description='Aspect ratio (width / height)', title='Aspectratio' + ) + duration: Optional[int] = Field(5, title='Duration') + images: Optional[List[StrictBytes]] = Field(None, title='Images') + ingredientsMode: IngredientsMode = Field(..., title='Ingredientsmode') negativePrompt: Optional[str] = Field(None, title='Negativeprompt') + promptText: Optional[str] = Field(None, title='Prompttext') + resolution: Optional[str] = Field('1080p', title='Resolution') seed: Optional[int] = Field(None, title='Seed') -class PikaGenerateResponse(BaseModel): - video_id: str = Field(..., title='Video Id') - - class PikaBodyGeneratePikadditionsGeneratePikadditionsPost(BaseModel): - video: Optional[StrictBytes] = Field(None, title='Video') image: Optional[StrictBytes] = Field(None, title='Image') - promptText: Optional[str] = Field(None, title='Prompttext') negativePrompt: Optional[str] = Field(None, title='Negativeprompt') + promptText: Optional[str] = Field(None, title='Prompttext') seed: Optional[int] = Field(None, title='Seed') + video: Optional[StrictBytes] = Field(None, title='Video') class PikaBodyGeneratePikaswapsGeneratePikaswapsPost(BaseModel): - video: Optional[StrictBytes] = Field(None, title='Video') image: Optional[StrictBytes] = Field(None, title='Image') - promptText: Optional[str] = Field(None, title='Prompttext') modifyRegionMask: Optional[StrictBytes] = Field( None, description='A mask image that specifies the region to modify, where the mask is white and the background is black', @@ -2180,35 +1654,23 @@ class PikaBodyGeneratePikaswapsGeneratePikaswapsPost(BaseModel): title='Modifyregionroi', ) negativePrompt: Optional[str] = Field(None, title='Negativeprompt') + promptText: Optional[str] = Field(None, title='Prompttext') seed: Optional[int] = Field(None, title='Seed') + video: Optional[StrictBytes] = Field(None, title='Video') -class IngredientsMode(str, Enum): - creative = 'creative' - precise = 'precise' +class PikaDurationEnum(int, Enum): + integer_5 = 5 + integer_10 = 10 -class AspectRatio1(RootModel[float]): - root: float = Field( - ..., - description='Aspect ratio (width / height)', - ge=0.4, - le=2.5, - title='Aspectratio', - ) +class PikaGenerateResponse(BaseModel): + video_id: str = Field(..., title='Video Id') -class PikaBodyGenerate22C2vGenerate22PikascenesPost(BaseModel): - images: Optional[List[StrictBytes]] = Field(None, title='Images') - ingredientsMode: IngredientsMode = Field(..., title='Ingredientsmode') - promptText: Optional[str] = Field(None, title='Prompttext') - negativePrompt: Optional[str] = Field(None, title='Negativeprompt') - seed: Optional[int] = Field(None, title='Seed') - resolution: Optional[str] = Field('1080p', title='Resolution') - duration: Optional[int] = Field(5, title='Duration') - aspectRatio: Optional[AspectRatio1] = Field( - None, description='Aspect ratio (width / height)', title='Aspectratio' - ) +class PikaResolutionEnum(str, Enum): + field_1080p = '1080p' + field_720p = '720p' class PikaStatusEnum(str, Enum): @@ -2223,14 +1685,161 @@ class PikaValidationError(BaseModel): type: str = Field(..., title='Error Type') -class PikaResolutionEnum(str, Enum): - field_1080p = '1080p' +class PikaVideoResponse(BaseModel): + id: str = Field(..., title='Id') + progress: Optional[int] = Field(None, title='Progress') + status: PikaStatusEnum + url: Optional[str] = Field(None, title='Url') + + +class Pikaffect(str, Enum): + Cake_ify = 'Cake-ify' + Crumble = 'Crumble' + Crush = 'Crush' + Decapitate = 'Decapitate' + Deflate = 'Deflate' + Dissolve = 'Dissolve' + Explode = 'Explode' + Eye_pop = 'Eye-pop' + Inflate = 'Inflate' + Levitate = 'Levitate' + Melt = 'Melt' + Peel = 'Peel' + Poke = 'Poke' + Squish = 'Squish' + Ta_da = 'Ta-da' + Tear = 'Tear' + + +class Resp(BaseModel): + img_id: Optional[int] = None + + +class PixverseImageUploadResponse(BaseModel): + ErrCode: Optional[int] = None + ErrMsg: Optional[str] = None + Resp_1: Optional[Resp] = Field(None, alias='Resp') + + +class Duration(int, Enum): + integer_5 = 5 + integer_8 = 8 + + +class Model1(str, Enum): + v3_5 = 'v3.5' + + +class MotionMode(str, Enum): + normal = 'normal' + fast = 'fast' + + +class Quality1(str, Enum): + field_360p = '360p' + field_540p = '540p' field_720p = '720p' + field_1080p = '1080p' -class PikaDurationEnum(int, Enum): +class Style1(str, Enum): + anime = 'anime' + field_3d_animation = '3d_animation' + clay = 'clay' + comic = 'comic' + cyberpunk = 'cyberpunk' + + +class PixverseImageVideoRequest(BaseModel): + duration: Duration + img_id: int + model: Model1 + motion_mode: Optional[MotionMode] = None + prompt: str + quality: Quality1 + seed: Optional[int] = None + style: Optional[Style1] = None + template_id: Optional[int] = None + water_mark: Optional[bool] = None + + +class AspectRatio2(str, Enum): + field_16_9 = '16:9' + field_4_3 = '4:3' + field_1_1 = '1:1' + field_3_4 = '3:4' + field_9_16 = '9:16' + + +class PixverseTextVideoRequest(BaseModel): + aspect_ratio: AspectRatio2 + duration: Duration + model: Model1 + motion_mode: Optional[MotionMode] = None + negative_prompt: Optional[str] = None + prompt: str + quality: Quality1 + seed: Optional[int] = None + style: Optional[Style1] = None + template_id: Optional[int] = None + water_mark: Optional[bool] = None + + +class PixverseTransitionVideoRequest(BaseModel): + duration: Duration + first_frame_img: int + last_frame_img: int + model: Model1 + motion_mode: MotionMode + prompt: str + quality: Quality1 + seed: int + style: Optional[Style1] = None + template_id: Optional[int] = None + water_mark: Optional[bool] = None + + +class Resp1(BaseModel): + video_id: Optional[int] = None + + +class PixverseVideoResponse(BaseModel): + ErrCode: Optional[int] = None + ErrMsg: Optional[str] = None + Resp: Optional[Resp1] = None + + +class Status7(int, Enum): + integer_1 = 1 integer_5 = 5 - integer_10 = 10 + integer_6 = 6 + integer_7 = 7 + integer_8 = 8 + + +class Resp2(BaseModel): + create_time: Optional[str] = None + id: Optional[int] = None + modify_time: Optional[str] = None + negative_prompt: Optional[str] = None + outputHeight: Optional[int] = None + outputWidth: Optional[int] = None + prompt: Optional[str] = None + resolution_ratio: Optional[int] = None + seed: Optional[int] = None + size: Optional[int] = None + status: Optional[Status7] = Field( + None, + description='Video generation status codes:\n* 1 - Generation successful\n* 5 - Generating\n* 6 - Deleted\n* 7 - Contents moderation failed\n* 8 - Generation failed\n', + ) + style: Optional[str] = None + url: Optional[str] = None + + +class PixverseVideoResultResponse(BaseModel): + ErrCode: Optional[int] = None + ErrMsg: Optional[str] = None + Resp: Optional[Resp2] = None class RgbItem(RootModel[int]): @@ -2241,54 +1850,113 @@ class RGBColor(BaseModel): rgb: List[RgbItem] = Field(..., max_length=3, min_length=3) -class StabilityStabilityClientID(RootModel[str]): - root: str = Field( - ..., - description='The name of your application, used to help us communicate app-specific debugging or moderation issues to you.', - examples=['my-awesome-app'], - max_length=256, - ) +class GenerateSummary(str, Enum): + auto = 'auto' + concise = 'concise' + detailed = 'detailed' -class StabilityStabilityClientUserID(RootModel[str]): - root: str = Field( - ..., - description='A unique identifier for your end user. Used to help us communicate user-specific debugging or moderation issues to you. Feel free to obfuscate this value to protect user privacy.', - examples=['DiscordUser#9999'], - max_length=256, - ) +class Summary(str, Enum): + auto = 'auto' + concise = 'concise' + detailed = 'detailed' + + +class ReasoningEffort(str, Enum): + low = 'low' + medium = 'medium' + high = 'high' -class StabilityStabilityClientVersion(RootModel[str]): - root: str = Field( +class Status8(str, Enum): + in_progress = 'in_progress' + completed = 'completed' + incomplete = 'incomplete' + + +class Type16(str, Enum): + summary_text = 'summary_text' + + +class SummaryItem(BaseModel): + text: str = Field( ..., - description='The version of your application, used to help us communicate version-specific debugging or moderation issues to you.', - examples=['1.2.1'], - max_length=256, + description='A short summary of the reasoning used by the model when generating\nthe response.\n', + ) + type: Type16 = Field( + ..., description='The type of the object. Always `summary_text`.\n' ) -class Name(str, Enum): - content_moderation = 'content_moderation' +class Type17(str, Enum): + reasoning = 'reasoning' -class StabilityContentModerationResponse(BaseModel): +class ReasoningItem(BaseModel): id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new) you file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, + ..., description='The unique identifier of the reasoning content.\n' ) - name: Name = Field( - ..., - description='Our content moderation system has flagged some part of your request and subsequently denied it. You were not charged for this request. While this may at times be frustrating, it is necessary to maintain the integrity of our platform and ensure a safe experience for all users. If you would like to provide feedback, please use the [Support Form](https://kb.stability.ai/knowledge-base/kb-tickets/new).', + status: Optional[Status8] = Field( + None, + description='The status of the item. One of `in_progress`, `completed`, or\n`incomplete`. Populated when items are returned via API.\n', ) - errors: List[str] = Field( - ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, + summary: List[SummaryItem] = Field(..., description='Reasoning text contents.\n') + type: Type17 = Field( + ..., description='The type of the object. Always `reasoning`.\n' + ) + + +class Controls(BaseModel): + artistic_level: Optional[int] = Field( + None, + description='Defines artistic tone of your image. At a simple level, the person looks straight at the camera in a static and clean style. Dynamic and eccentric levels introduce movement and creativity.', + ge=0, + le=5, + ) + background_color: Optional[RGBColor] = None + colors: Optional[List[RGBColor]] = Field( + None, description='An array of preferable colors' + ) + no_text: Optional[bool] = Field(None, description='Do not embed text layouts') + + +class RecraftImageGenerationRequest(BaseModel): + controls: Optional[Controls] = Field( + None, description='The controls for the generated image' + ) + model: str = Field( + ..., description='The model to use for generation (e.g., "recraftv3")' + ) + n: int = Field(..., description='The number of images to generate', ge=1, le=4) + prompt: str = Field( + ..., description='The text prompt describing the image to generate' + ) + size: str = Field( + ..., description='The size of the generated image (e.g., "1024x1024")' + ) + style: Optional[str] = Field( + None, + description='The style to apply to the generated image (e.g., "digital_illustration")', + ) + style_id: Optional[str] = Field( + None, + description='The style ID to apply to the generated image (e.g., "123e4567-e89b-12d3-a456-426614174000"). If style_id is provided, style should not be provided.', + ) + + +class Datum3(BaseModel): + image_id: Optional[str] = Field( + None, description='Unique identifier for the generated image' + ) + url: Optional[str] = Field(None, description='URL to access the generated image') + + +class RecraftImageGenerationResponse(BaseModel): + created: int = Field( + ..., description='Unix timestamp when the generation was created' ) + credits: int = Field(..., description='Number of credits used for the generation') + data: List[Datum3] = Field(..., description='Array of generated image information') class RenderingSpeed(str, Enum): @@ -2297,403 +1965,249 @@ class RenderingSpeed(str, Enum): QUALITY = 'QUALITY' -class StabilityCreativity(RootModel[float]): - root: float = Field( - ..., - description='Controls the likelihood of creating additional details not heavily conditioned by the init image.', - ge=0.2, - le=0.5, - ) +class ResponseErrorCode(str, Enum): + server_error = 'server_error' + rate_limit_exceeded = 'rate_limit_exceeded' + invalid_prompt = 'invalid_prompt' + vector_store_timeout = 'vector_store_timeout' + invalid_image = 'invalid_image' + invalid_image_format = 'invalid_image_format' + invalid_base64_image = 'invalid_base64_image' + invalid_image_url = 'invalid_image_url' + image_too_large = 'image_too_large' + image_too_small = 'image_too_small' + image_parse_error = 'image_parse_error' + image_content_policy_violation = 'image_content_policy_violation' + invalid_image_mode = 'invalid_image_mode' + image_file_too_large = 'image_file_too_large' + unsupported_image_media_type = 'unsupported_image_media_type' + empty_image_file = 'empty_image_file' + failed_to_download_image = 'failed_to_download_image' + image_file_not_found = 'image_file_not_found' + +class Type18(str, Enum): + json_object = 'json_object' -class StabilityGenerationID(RootModel[str]): - root: str = Field( + +class ResponseFormatJsonObject(BaseModel): + type: Type18 = Field( ..., - description='The `id` of a generation, typically used for async generations, that can be used to check the status of the generation or retrieve the result.', - examples=['a6dc6c6e20acda010fe14d71f180658f2896ed9b4ec25aa99a6ff06c796987c4'], - max_length=64, - min_length=64, + description='The type of response format being defined. Always `json_object`.', ) -class Mode(str, Enum): - text_to_image = 'text-to-image' - image_to_image = 'image-to-image' +class ResponseFormatJsonSchemaSchema(BaseModel): + pass + model_config = ConfigDict( + extra='allow', + ) -class AspectRatio2(str, Enum): - field_21_9 = '21:9' - field_16_9 = '16:9' - field_3_2 = '3:2' - field_5_4 = '5:4' - field_1_1 = '1:1' - field_4_5 = '4:5' - field_2_3 = '2:3' - field_9_16 = '9:16' - field_9_21 = '9:21' +class Type19(str, Enum): + text = 'text' -class Model4(str, Enum): - sd3_5_large = 'sd3.5-large' - sd3_5_large_turbo = 'sd3.5-large-turbo' - sd3_5_medium = 'sd3.5-medium' +class ResponseFormatText(BaseModel): + type: Type19 = Field( + ..., description='The type of response format being defined. Always `text`.' + ) -class OutputFormat3(str, Enum): - png = 'png' - jpeg = 'jpeg' +class Truncation1(str, Enum): + auto = 'auto' + disabled = 'disabled' -class StylePreset(str, Enum): - enhance = 'enhance' - anime = 'anime' - photographic = 'photographic' - digital_art = 'digital-art' - comic_book = 'comic-book' - fantasy_art = 'fantasy-art' - line_art = 'line-art' - analog_film = 'analog-film' - neon_punk = 'neon-punk' - isometric = 'isometric' - low_poly = 'low-poly' - origami = 'origami' - modeling_compound = 'modeling-compound' - cinematic = 'cinematic' - field_3d_model = '3d-model' - pixel_art = 'pixel-art' - tile_texture = 'tile-texture' - - -class StabilityImageGenrationSD3Request(BaseModel): - prompt: str = Field( +class InputTokensDetails1(BaseModel): + cached_tokens: int = Field( ..., - description='What you wish to see in the output image. A strong, descriptive prompt that clearly defines\nelements, colors, and subjects will lead to better results.', - max_length=10000, - min_length=1, - ) - mode: Optional[Mode] = Field( - 'text-to-image', - description='Controls whether this is a text-to-image or image-to-image generation, which affects which parameters are required:\n- **text-to-image** requires only the `prompt` parameter\n- **image-to-image** requires the `prompt`, `image`, and `strength` parameters', - title='GenerationMode', - ) - image: Optional[StrictBytes] = Field( - None, - description='The image to use as the starting point for the generation.\n\nSupported formats:\n\n\n\n - jpeg\n - png\n - webp\n\nSupported dimensions:\n\n\n\n - Every side must be at least 64 pixels\n\n> **Important:** This parameter is only valid for **image-to-image** requests.', - ) - strength: Optional[float] = Field( - None, - description='Sometimes referred to as _denoising_, this parameter controls how much influence the\n`image` parameter has on the generated image. A value of 0 would yield an image that\nis identical to the input. A value of 1 would be as if you passed in no image at all.\n\n> **Important:** This parameter is only valid for **image-to-image** requests.', - ge=0.0, - le=1.0, - ) - aspect_ratio: Optional[AspectRatio2] = Field( - '1:1', - description='Controls the aspect ratio of the generated image. Defaults to 1:1.\n\n> **Important:** This parameter is only valid for **text-to-image** requests.', - ) - model: Optional[Model4] = Field( - 'sd3.5-large', - description='The model to use for generation.\n\n- `sd3.5-large` requires 6.5 credits per generation\n- `sd3.5-large-turbo` requires 4 credits per generation\n- `sd3.5-medium` requires 3.5 credits per generation\n- As of the April 17, 2025, `sd3-large`, `sd3-large-turbo` and `sd3-medium`\n\n\n\n are re-routed to their `sd3.5-[model version]` equivalent, at the same price.', - ) - seed: Optional[float] = Field( - 0, - description="A specific value that is used to guide the 'randomness' of the generation. (Omit this parameter or pass `0` to use a random seed.)", - ge=0.0, - le=4294967294.0, - ) - output_format: Optional[OutputFormat3] = Field( - 'png', description='Dictates the `content-type` of the generated image.' - ) - style_preset: Optional[StylePreset] = Field( - None, description='Guides the image model towards a particular style.' - ) - negative_prompt: Optional[str] = Field( - None, - description='Keywords of what you **do not** wish to see in the output image.\nThis is an advanced feature.', - max_length=10000, - ) - cfg_scale: Optional[float] = Field( - None, - description='How strictly the diffusion process adheres to the prompt text (higher values keep your image closer to your prompt). The _Large_ and _Medium_ models use a default of `4`. The _Turbo_ model uses a default of `1`.', - ge=1.0, - le=10.0, + description='The number of tokens that were retrieved from the cache. \n[More on prompt caching](/docs/guides/prompt-caching).\n', ) -class FinishReason(str, Enum): - SUCCESS = 'SUCCESS' - CONTENT_FILTERED = 'CONTENT_FILTERED' +class OutputTokensDetails(BaseModel): + reasoning_tokens: int = Field(..., description='The number of reasoning tokens.') -class StabilityImageGenrationSD3Response200(BaseModel): - image: str = Field( - ..., - description='The generated image, encoded to base64.', - examples=['AAAAIGZ0eXBpc29tAAACAGlzb21pc28yYXZjMW1...'], - ) - seed: Optional[float] = Field( - 0, - description='The seed used as random noise for this generation.', - examples=[343940597], - ge=0.0, - le=4294967294.0, +class ResponseUsage(BaseModel): + input_tokens: int = Field(..., description='The number of input tokens.') + input_tokens_details: InputTokensDetails1 = Field( + ..., description='A detailed breakdown of the input tokens.' ) - finish_reason: FinishReason = Field( - ..., - description='The reason the generation finished.\n\n- `SUCCESS` = successful generation.\n- `CONTENT_FILTERED` = successful generation, however the output violated our content moderation\npolicy and has been blurred as a result.', - examples=['SUCCESS'], + output_tokens: int = Field(..., description='The number of output tokens.') + output_tokens_details: OutputTokensDetails = Field( + ..., description='A detailed breakdown of the output tokens.' ) + total_tokens: int = Field(..., description='The total number of tokens used.') -class StabilityImageGenrationSD3Response400(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, - ) - name: str = Field( - ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, - ) - errors: List[str] = Field( - ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, +class Rodin3DCheckStatusRequest(BaseModel): + subscription_key: str = Field( + ..., description='subscription from generate endpoint' ) -class StabilityImageGenrationSD3Response413(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, - ) - name: str = Field( - ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, - ) - errors: List[str] = Field( - ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, - ) +class Rodin3DCheckStatusResponse(BaseModel): + pass -class StabilityImageGenrationSD3Response422(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, - ) - name: str = Field( - ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, - ) - errors: List[str] = Field( - ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, - ) +class Rodin3DDownloadRequest(BaseModel): + task_uuid: str = Field(..., description='Task UUID') -class StabilityImageGenrationSD3Response429(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, - ) - name: str = Field( - ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, - ) - errors: List[str] = Field( - ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, - ) +class RodinGenerateJobsData(BaseModel): + subscription_key: Optional[str] = Field(None, description='Subscription Key.') + uuids: Optional[List[str]] = Field(None, description='subjobs uuid.') -class StabilityImageGenrationSD3Response500(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, - ) - name: str = Field( - ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, - ) - errors: List[str] = Field( - ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, - ) +class RodinMaterialType(str, Enum): + PBR = 'PBR' + Shaded = 'Shaded' -class OutputFormat4(str, Enum): - jpeg = 'jpeg' - png = 'png' - webp = 'webp' +class RodinMeshModeType(str, Enum): + Quad = 'Quad' + Raw = 'Raw' -class StabilityImageGenrationUpscaleConservativeRequest(BaseModel): - image: StrictBytes = Field( - ..., - description='The image you wish to upscale.\n\nSupported Formats:\n- jpeg\n- png\n- webp\n\nValidation Rules:\n- Every side must be at least 64 pixels\n- Total pixel count must be between 4,096 and 9,437,184 pixels\n- The aspect ratio must be between 1:2.5 and 2.5:1', - examples=['./some/image.png'], - ) - prompt: str = Field( - ..., - description="What you wish to see in the output image. A strong, descriptive prompt that clearly defines\nelements, colors, and subjects will lead to better results.\n\nTo control the weight of a given word use the format `(word:weight)`,\nwhere `word` is the word you'd like to control the weight of and `weight`\nis a value between 0 and 1. For example: `The sky was a crisp (blue:0.3) and (green:0.8)`\nwould convey a sky that was blue and green, but more green than blue.", - max_length=10000, - min_length=1, - ) - negative_prompt: Optional[str] = Field( - None, - description='A blurb of text describing what you **do not** wish to see in the output image.\nThis is an advanced feature.', - max_length=10000, - ) - seed: Optional[float] = Field( - 0, - description="A specific value that is used to guide the 'randomness' of the generation. (Omit this parameter or pass `0` to use a random seed.)", - ge=0.0, - le=4294967294.0, - ) - output_format: Optional[OutputFormat4] = Field( - 'png', description='Dictates the `content-type` of the generated image.' - ) - creativity: Optional[StabilityCreativity] = Field( - default_factory=lambda: StabilityCreativity.model_validate(0.35) - ) +class RodinQualityType(str, Enum): + extra_low = 'extra-low' + low = 'low' + medium = 'medium' + high = 'high' -class StabilityImageGenrationUpscaleConservativeResponse200(BaseModel): - image: str = Field( - ..., - description='The generated image, encoded to base64.', - examples=['AAAAIGZ0eXBpc29tAAACAGlzb21pc28yYXZjMW1...'], - ) - seed: Optional[float] = Field( - 0, - description='The seed used as random noise for this generation.', - examples=[343940597], - ge=0.0, - le=4294967294.0, - ) - finish_reason: FinishReason = Field( - ..., - description='The reason the generation finished.\n\n- `SUCCESS` = successful generation.\n- `CONTENT_FILTERED` = successful generation, however the output violated our content moderation\npolicy and has been blurred as a result.', - examples=['SUCCESS'], - ) +class RodinResourceItem(BaseModel): + name: Optional[str] = Field(None, description='File name') + url: Optional[str] = Field(None, description='Download url') -class StabilityImageGenrationUpscaleConservativeResponse400(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, - ) - name: str = Field( - ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, - ) - errors: List[str] = Field( - ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, - ) +class RodinTierType(str, Enum): + Regular = 'Regular' + Sketch = 'Sketch' + Detail = 'Detail' + Smooth = 'Smooth' -class StabilityImageGenrationUpscaleConservativeResponse413(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, - ) - name: str = Field( +class RunwayAspectRatioEnum(str, Enum): + field_1280_720 = '1280:720' + field_720_1280 = '720:1280' + field_1104_832 = '1104:832' + field_832_1104 = '832:1104' + field_960_960 = '960:960' + field_1584_672 = '1584:672' + field_1280_768 = '1280:768' + field_768_1280 = '768:1280' + + +class RunwayDurationEnum(int, Enum): + integer_5 = 5 + integer_10 = 10 + + +class RunwayImageToVideoResponse(BaseModel): + id: Optional[str] = Field(None, description='Task ID') + + +class RunwayModelEnum(str, Enum): + gen4_turbo = 'gen4_turbo' + gen3a_turbo = 'gen3a_turbo' + + +class Position(str, Enum): + first = 'first' + last = 'last' + + +class RunwayPromptImageDetailedObject(BaseModel): + position: Position = Field( ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, + description="The position of the image in the output video. 'last' is currently supported for gen3a_turbo only.", ) - errors: List[str] = Field( - ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, + uri: str = Field( + ..., description='A HTTPS URL or data URI containing an encoded image.' ) -class StabilityImageGenrationUpscaleConservativeResponse422(BaseModel): - id: str = Field( +class RunwayPromptImageObject( + RootModel[Union[str, List[RunwayPromptImageDetailedObject]]] +): + root: Union[str, List[RunwayPromptImageDetailedObject]] = Field( ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, + description='Image(s) to use for the video generation. Can be a single URI or an array of image objects with positions.', ) - name: str = Field( - ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, + + +class RunwayTaskStatusEnum(str, Enum): + SUCCEEDED = 'SUCCEEDED' + RUNNING = 'RUNNING' + FAILED = 'FAILED' + PENDING = 'PENDING' + CANCELLED = 'CANCELLED' + THROTTLED = 'THROTTLED' + + +class RunwayTaskStatusResponse(BaseModel): + createdAt: datetime = Field(..., description='Task creation timestamp') + id: str = Field(..., description='Task ID') + output: Optional[List[str]] = Field(None, description='Array of output video URLs') + progress: Optional[float] = Field( + None, + description='Float value between 0 and 1 representing the progress of the task. Only available if status is RUNNING.', + ge=0.0, + le=1.0, ) - errors: List[str] = Field( - ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, + status: RunwayTaskStatusEnum + + +class RunwayTextToImageAspectRatioEnum(str, Enum): + field_1920_1080 = '1920:1080' + field_1080_1920 = '1080:1920' + field_1024_1024 = '1024:1024' + field_1360_768 = '1360:768' + field_1080_1080 = '1080:1080' + field_1168_880 = '1168:880' + field_1440_1080 = '1440:1080' + field_1080_1440 = '1080:1440' + field_1808_768 = '1808:768' + field_2112_912 = '2112:912' + +class Model4(str, Enum): + gen4_image = 'gen4_image' + + +class ReferenceImage(BaseModel): + uri: Optional[str] = Field( + None, description='A HTTPS URL or data URI containing an encoded image' ) -class StabilityImageGenrationUpscaleConservativeResponse429(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, +class RunwayTextToImageRequest(BaseModel): + model: Model4 = Field(..., description='Model to use for generation') + promptText: str = Field( + ..., description='Text prompt for the image generation', max_length=1000 ) - name: str = Field( - ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, + ratio: RunwayTextToImageAspectRatioEnum + referenceImages: Optional[List[ReferenceImage]] = Field( + None, description='Array of reference images to guide the generation' ) + + +class RunwayTextToImageResponse(BaseModel): + id: Optional[str] = Field(None, description='Task ID') + + +class StabilityError(BaseModel): errors: List[str] = Field( ..., description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], + examples=[[{'some-field': 'is required'}]], min_length=1, ) - - -class StabilityImageGenrationUpscaleConservativeResponse500(BaseModel): id: str = Field( ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', + description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new) you file, as it will greatly assist us in diagnosing the root cause of the problem.\n', examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], min_length=1, ) @@ -2703,412 +2217,518 @@ class StabilityImageGenrationUpscaleConservativeResponse500(BaseModel): examples=['bad_request'], min_length=1, ) - errors: List[str] = Field( - ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, - ) -class StabilityImageGenrationUpscaleCreativeRequest(BaseModel): - image: StrictBytes = Field( - ..., - description='The image you wish to upscale.\n\nSupported Formats:\n- jpeg\n- png\n- webp\n\nValidation Rules:\n- Every side must be at least 64 pixels\n- Total pixel count must be between 4,096 and 1,048,576 pixels', - examples=['./some/image.png'], - ) - prompt: str = Field( - ..., - description="What you wish to see in the output image. A strong, descriptive prompt that clearly defines\nelements, colors, and subjects will lead to better results.\n\nTo control the weight of a given word use the format `(word:weight)`,\nwhere `word` is the word you'd like to control the weight of and `weight`\nis a value between 0 and 1. For example: `The sky was a crisp (blue:0.3) and (green:0.8)`\nwould convey a sky that was blue and green, but more green than blue.", - max_length=10000, - min_length=1, - ) - negative_prompt: Optional[str] = Field( - None, - description='A blurb of text describing what you **do not** wish to see in the output image.\nThis is an advanced feature.', - max_length=10000, - ) - output_format: Optional[OutputFormat4] = Field( - 'png', description='Dictates the `content-type` of the generated image.' - ) - seed: Optional[float] = Field( - 0, - description="A specific value that is used to guide the 'randomness' of the generation. (Omit this parameter or pass `0` to use a random seed.)", - ge=0.0, - le=4294967294.0, - ) - creativity: Optional[float] = Field( - 0.3, - description='Indicates how creative the model should be when upscaling an image.\nHigher values will result in more details being added to the image during upscaling.', - ge=0.1, - le=0.5, - ) - style_preset: Optional[StylePreset] = Field( - None, description='Guides the image model towards a particular style.' +class Status9(str, Enum): + in_progress = 'in-progress' + + +class StabilityGetResultResponse202(BaseModel): + id: Optional[str] = Field( + None, description='The ID of the generation result.', examples=[1234567890] ) + status: Optional[Status9] = None -class StabilityImageGenrationUpscaleCreativeResponse200(BaseModel): - id: StabilityGenerationID +class Type20(str, Enum): + json_schema = 'json_schema' -class StabilityImageGenrationUpscaleCreativeResponse400(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, +class TextResponseFormatJsonSchema(BaseModel): + description: Optional[str] = Field( + None, + description='A description of what the response format is for, used by the model to\ndetermine how to respond in the format.\n', ) name: str = Field( ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, + description='The name of the response format. Must be a-z, A-Z, 0-9, or contain\nunderscores and dashes, with a maximum length of 64.\n', ) - errors: List[str] = Field( + schema_: ResponseFormatJsonSchemaSchema = Field(..., alias='schema') + strict: Optional[bool] = Field( + False, + description='Whether to enable strict schema adherence when generating the output.\nIf set to true, the model will always follow the exact schema defined\nin the `schema` field. Only a subset of JSON Schema is supported when\n`strict` is `true`. To learn more, read the [Structured Outputs\nguide](/docs/guides/structured-outputs).\n', + ) + type: Type20 = Field( ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, + description='The type of response format being defined. Always `json_schema`.', ) -class StabilityImageGenrationUpscaleCreativeResponse413(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, - ) - name: str = Field( - ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, +class Type21(str, Enum): + function = 'function' + + +class ToolChoiceFunction(BaseModel): + name: str = Field(..., description='The name of the function to call.') + type: Type21 = Field( + ..., description='For function calling, the type is always `function`.' ) - errors: List[str] = Field( + + +class ToolChoiceOptions(str, Enum): + none = 'none' + auto = 'auto' + required = 'required' + + +class Type22(str, Enum): + file_search = 'file_search' + web_search_preview = 'web_search_preview' + computer_use_preview = 'computer_use_preview' + web_search_preview_2025_03_11 = 'web_search_preview_2025_03_11' + + +class ToolChoiceTypes(BaseModel): + type: Type22 = Field( ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, + description='The type of hosted tool the model should to use. Learn more about\n[built-in tools](/docs/guides/tools).\n\nAllowed values are:\n- `file_search`\n- `web_search_preview`\n- `computer_use_preview`\n', ) -class StabilityImageGenrationUpscaleCreativeResponse422(BaseModel): - id: str = Field( +class TripoAnimation(str, Enum): + preset_idle = 'preset:idle' + preset_walk = 'preset:walk' + preset_climb = 'preset:climb' + preset_jump = 'preset:jump' + preset_run = 'preset:run' + preset_slash = 'preset:slash' + preset_shoot = 'preset:shoot' + preset_hurt = 'preset:hurt' + preset_fall = 'preset:fall' + preset_turn = 'preset:turn' + + +class TripoBalance(BaseModel): + balance: float + frozen: float + + +class TripoConvertFormat(str, Enum): + GLTF = 'GLTF' + USDZ = 'USDZ' + FBX = 'FBX' + OBJ = 'OBJ' + STL = 'STL' + field_3MF = '3MF' + + +class Code(int, Enum): + integer_1001 = 1001 + integer_2000 = 2000 + integer_2001 = 2001 + integer_2002 = 2002 + integer_2003 = 2003 + integer_2004 = 2004 + integer_2006 = 2006 + integer_2007 = 2007 + integer_2008 = 2008 + integer_2010 = 2010 + + +class TripoErrorResponse(BaseModel): + code: Code + message: str + suggestion: str + + +class TripoImageToModel(str, Enum): + image_to_model = 'image_to_model' + + +class TripoModelStyle(str, Enum): + person_person2cartoon = 'person:person2cartoon' + animal_venom = 'animal:venom' + object_clay = 'object:clay' + object_steampunk = 'object:steampunk' + object_christmas = 'object:christmas' + object_barbie = 'object:barbie' + gold = 'gold' + ancient_bronze = 'ancient_bronze' + + +class TripoModelVersion(str, Enum): + V2_5 = 'v2.5-20250123' + V2_0 = 'v2.0-20240919' + V1_4 = 'v1.4-20240625' + + +class TripoMultiviewMode(str, Enum): + LEFT = 'LEFT' + RIGHT = 'RIGHT' + + +class TripoMultiviewToModel(str, Enum): + multiview_to_model = 'multiview_to_model' + + +class TripoOrientation(str, Enum): + align_image = 'align_image' + default = 'default' + + +class TripoResponseSuccessCode(RootModel[int]): + root: int = Field( ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, + description='Standard success code for Tripo API responses. Typically 0 for success.', + examples=[0], ) - name: str = Field( + + +class TripoSpec(str, Enum): + mixamo = 'mixamo' + tripo = 'tripo' + + +class TripoStandardFormat(str, Enum): + glb = 'glb' + fbx = 'fbx' + + +class TripoStylizeOptions(str, Enum): + lego = 'lego' + voxel = 'voxel' + voronoi = 'voronoi' + minecraft = 'minecraft' + + +class Code1(int, Enum): + integer_0 = 0 + + +class Data8(BaseModel): + task_id: str = Field(..., description='used for getTask') + + +class TripoSuccessTask(BaseModel): + code: Code1 + data: Data8 + + +class Topology(str, Enum): + bip = 'bip' + quad = 'quad' + + +class Output(BaseModel): + base_model: Optional[str] = None + model: Optional[str] = None + pbr_model: Optional[str] = None + rendered_image: Optional[str] = None + riggable: Optional[bool] = None + topology: Optional[Topology] = None + + +class Status10(str, Enum): + queued = 'queued' + running = 'running' + success = 'success' + failed = 'failed' + cancelled = 'cancelled' + unknown = 'unknown' + banned = 'banned' + expired = 'expired' + + +class TripoTask(BaseModel): + create_time: int + input: Dict[str, Any] + output: Output + progress: int = Field(..., ge=0, le=100) + status: Status10 + task_id: str + type: str + + +class TripoTextToModel(str, Enum): + text_to_model = 'text_to_model' + + +class TripoTextureAlignment(str, Enum): + original_image = 'original_image' + geometry = 'geometry' + + +class TripoTextureFormat(str, Enum): + BMP = 'BMP' + DPX = 'DPX' + HDR = 'HDR' + JPEG = 'JPEG' + OPEN_EXR = 'OPEN_EXR' + PNG = 'PNG' + TARGA = 'TARGA' + TIFF = 'TIFF' + WEBP = 'WEBP' + + +class TripoTextureQuality(str, Enum): + standard = 'standard' + detailed = 'detailed' + + +class TripoTopology(str, Enum): + bip = 'bip' + quad = 'quad' + + +class TripoTypeAnimatePrerigcheck(str, Enum): + animate_prerigcheck = 'animate_prerigcheck' + + +class TripoTypeAnimateRetarget(str, Enum): + animate_retarget = 'animate_retarget' + + +class TripoTypeAnimateRig(str, Enum): + animate_rig = 'animate_rig' + + +class TripoTypeConvertModel(str, Enum): + convert_model = 'convert_model' + + +class TripoTypeRefineModel(str, Enum): + refine_model = 'refine_model' + + +class TripoTypeStylizeModel(str, Enum): + stylize_model = 'stylize_model' + + +class TripoTypeTextureModel(str, Enum): + texture_model = 'texture_model' + + +class Veo2GenVidPollRequest(BaseModel): + operationName: str = Field( ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, + description='Full operation name (from predict response)', + examples=[ + 'projects/PROJECT_ID/locations/us-central1/publishers/google/models/MODEL_ID/operations/OPERATION_ID' + ], ) - errors: List[str] = Field( - ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, + + +class Error(BaseModel): + code: Optional[int] = Field(None, description='Error code') + message: Optional[str] = Field(None, description='Error message') + + +class Video(BaseModel): + bytesBase64Encoded: Optional[str] = Field( + None, description='Base64-encoded video content' ) + gcsUri: Optional[str] = Field(None, description='Cloud Storage URI of the video') + mimeType: Optional[str] = Field(None, description='Video MIME type') -class StabilityImageGenrationUpscaleCreativeResponse429(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, +class Response(BaseModel): + field_type: Optional[str] = Field( + None, + alias='@type', + examples=[ + 'type.googleapis.com/cloud.ai.large_models.vision.GenerateVideoResponse' + ], ) - name: str = Field( - ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, + raiMediaFilteredCount: Optional[int] = Field( + None, description='Count of media filtered by responsible AI policies' ) - errors: List[str] = Field( - ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, + raiMediaFilteredReasons: Optional[List[str]] = Field( + None, description='Reasons why media was filtered by responsible AI policies' ) + videos: Optional[List[Video]] = None -class StabilityImageGenrationUpscaleCreativeResponse500(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, +class Veo2GenVidPollResponse(BaseModel): + done: Optional[bool] = None + error: Optional[Error] = Field( + None, description='Error details if operation failed' ) - name: str = Field( - ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, + name: Optional[str] = None + response: Optional[Response] = Field( + None, description='The actual prediction response if done is true' ) - errors: List[str] = Field( - ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, + + +class Image(BaseModel): + bytesBase64Encoded: str + gcsUri: Optional[str] = None + mimeType: Optional[str] = None + + +class Image1(BaseModel): + bytesBase64Encoded: Optional[str] = None + gcsUri: str + mimeType: Optional[str] = None + + +class Instance(BaseModel): + image: Optional[Union[Image, Image1]] = Field( + None, description='Optional image to guide video generation' + ) + prompt: str = Field(..., description='Text description of the video') + + +class PersonGeneration1(str, Enum): + ALLOW = 'ALLOW' + BLOCK = 'BLOCK' + + +class Parameters(BaseModel): + aspectRatio: Optional[str] = Field(None, examples=['16:9']) + durationSeconds: Optional[int] = None + enhancePrompt: Optional[bool] = None + negativePrompt: Optional[str] = None + personGeneration: Optional[PersonGeneration1] = None + sampleCount: Optional[int] = None + seed: Optional[int] = None + storageUri: Optional[str] = Field( + None, description='Optional Cloud Storage URI to upload the video' ) -class StabilityImageGenrationUpscaleFastRequest(BaseModel): - image: StrictBytes = Field( - ..., - description='The image you wish to upscale.\n\nSupported Formats:\n- jpeg\n- png\n- webp\n\nValidation Rules:\n- Width must be between 32 and 1,536 pixels\n- Height must be between 32 and 1,536 pixels\n- Total pixel count must be between 1,024 and 1,048,576 pixels', - examples=['./some/image.png'], - ) - output_format: Optional[OutputFormat4] = Field( - 'png', description='Dictates the `content-type` of the generated image.' - ) +class Veo2GenVidRequest(BaseModel): + instances: Optional[List[Instance]] = None + parameters: Optional[Parameters] = None -class StabilityImageGenrationUpscaleFastResponse200(BaseModel): - image: str = Field( - ..., - description='The generated image, encoded to base64.', - examples=['AAAAIGZ0eXBpc29tAAACAGlzb21pc28yYXZjMW1...'], - ) - seed: Optional[float] = Field( - 0, - description='The seed used as random noise for this generation.', - examples=[343940597], - ge=0.0, - le=4294967294.0, - ) - finish_reason: FinishReason = Field( +class Veo2GenVidResponse(BaseModel): + name: str = Field( ..., - description='The reason the generation finished.\n\n- `SUCCESS` = successful generation.\n- `CONTENT_FILTERED` = successful generation, however the output violated our content moderation\npolicy and has been blurred as a result.', - examples=['SUCCESS'], + description='Operation resource name', + examples=[ + 'projects/PROJECT_ID/locations/us-central1/publishers/google/models/MODEL_ID/operations/a1b07c8e-7b5a-4aba-bb34-3e1ccb8afcc8' + ], ) -class StabilityImageGenrationUpscaleFastResponse400(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, - ) - name: str = Field( - ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, - ) - errors: List[str] = Field( - ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, - ) +class SearchContextSize(str, Enum): + low = 'low' + medium = 'medium' + high = 'high' -class StabilityImageGenrationUpscaleFastResponse413(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, - ) - name: str = Field( - ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, - ) - errors: List[str] = Field( - ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, - ) +class Type23(str, Enum): + web_search_preview = 'web_search_preview' + web_search_preview_2025_03_11 = 'web_search_preview_2025_03_11' -class StabilityImageGenrationUpscaleFastResponse422(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, - ) - name: str = Field( - ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, +class WebSearchPreviewTool(BaseModel): + search_context_size: Optional[SearchContextSize] = Field( + None, + description='High level guidance for the amount of context window space to use for the search. One of `low`, `medium`, or `high`. `medium` is the default.', ) - errors: List[str] = Field( + type: Literal['WebSearchPreviewTool'] = Field( ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, + description='The type of the web search tool. One of `web_search_preview` or `web_search_preview_2025_03_11`.', ) -class StabilityImageGenrationUpscaleFastResponse429(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, - ) - name: str = Field( - ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, - ) - errors: List[str] = Field( - ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, - ) +class Status11(str, Enum): + in_progress = 'in_progress' + searching = 'searching' + completed = 'completed' + failed = 'failed' -class StabilityImageGenrationUpscaleFastResponse500(BaseModel): - id: str = Field( - ..., - description='A unique identifier associated with this error. Please include this in any [support tickets](https://kb.stability.ai/knowledge-base/kb-tickets/new)\nyou file, as it will greatly assist us in diagnosing the root cause of the problem.', - examples=['a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4'], - min_length=1, - ) - name: str = Field( - ..., - description='Short-hand name for an error, useful for discriminating between errors with the same status code.', - examples=['bad_request'], - min_length=1, +class Type24(str, Enum): + web_search_call = 'web_search_call' + + +class WebSearchToolCall(BaseModel): + id: str = Field(..., description='The unique ID of the web search tool call.\n') + status: Status11 = Field( + ..., description='The status of the web search tool call.\n' ) - errors: List[str] = Field( + type: Type24 = Field( ..., - description='One or more error messages indicating what went wrong.', - examples=[['some-field: is required']], - min_length=1, + description='The type of the web search tool call. Always `web_search_call`.\n', ) -class ActionJobResult(BaseModel): - id: Optional[UUID] = Field(None, description='Unique identifier for the job result') - workflow_name: Optional[str] = Field(None, description='Name of the workflow') - operating_system: Optional[str] = Field(None, description='Operating system used') - python_version: Optional[str] = Field(None, description='PyTorch version used') - pytorch_version: Optional[str] = Field(None, description='PyTorch version used') - action_run_id: Optional[str] = Field( - None, description='Identifier of the run this result belongs to' - ) - action_job_id: Optional[str] = Field( - None, description='Identifier of the job this result belongs to' - ) - cuda_version: Optional[str] = Field(None, description='CUDA version used') - branch_name: Optional[str] = Field( - None, description='Name of the relevant git branch' - ) - commit_hash: Optional[str] = Field(None, description='The hash of the commit') - commit_id: Optional[str] = Field(None, description='The ID of the commit') - commit_time: Optional[int] = Field( - None, description='The Unix timestamp when the commit was made' - ) - commit_message: Optional[str] = Field(None, description='The message of the commit') - comfy_run_flags: Optional[str] = Field( - None, description='The comfy run flags. E.g. `--low-vram`' - ) - git_repo: Optional[str] = Field(None, description='The repository name') - pr_number: Optional[str] = Field(None, description='The pull request number') - start_time: Optional[int] = Field( - None, description='The start time of the job as a Unix timestamp.' - ) - end_time: Optional[int] = Field( - None, description='The end time of the job as a Unix timestamp.' - ) - avg_vram: Optional[int] = Field( - None, description='The average VRAM used by the job' - ) - peak_vram: Optional[int] = Field(None, description='The peak VRAM used by the job') - job_trigger_user: Optional[str] = Field( - None, description='The user who triggered the job.' - ) - author: Optional[str] = Field(None, description='The author of the commit') - machine_stats: Optional[MachineStats] = None - status: Optional[WorkflowRunStatus] = None - storage_file: Optional[StorageFile] = None +class CreateModelResponseProperties(ModelResponseProperties): + pass -class Publisher(BaseModel): - name: Optional[str] = None - id: Optional[str] = Field( +class GeminiInlineData(BaseModel): + data: Optional[str] = Field( None, - description="The unique identifier for the publisher. It's akin to a username. Should be lowercase.", + description='The base64 encoding of the image, PDF, or video to include inline in the prompt. When including media inline, you must also specify the media type (mimeType) of the data. Size limit: 20MB\n', ) - description: Optional[str] = None - website: Optional[str] = None - support: Optional[str] = None - source_code_repo: Optional[str] = None - logo: Optional[str] = Field(None, description="URL to the publisher's logo.") - createdAt: Optional[datetime] = Field( - None, description='The date and time the publisher was created.' + mimeType: Optional[GeminiMimeType] = None + + +class GeminiPart(BaseModel): + inlineData: Optional[GeminiInlineData] = None + text: Optional[str] = Field( + None, + description='A text prompt or code snippet.', + examples=['Write a story about a robot learning to paint'], ) - members: Optional[List[PublisherMember]] = Field( - None, description='A list of members in the publisher.' + + +class GeminiPromptFeedback(BaseModel): + blockReason: Optional[str] = None + blockReasonMessage: Optional[str] = None + safetyRatings: Optional[List[GeminiSafetyRating]] = None + + +class GeminiSafetySetting(BaseModel): + category: GeminiSafetyCategory + threshold: GeminiSafetyThreshold + + +class GeminiSystemInstructionContent(BaseModel): + parts: List[GeminiTextPart] = Field( + ..., + description='A list of ordered parts that make up a single message. Different parts may have different IANA MIME types. For limits on the inputs, such as the maximum number of tokens or the number of images, see the model specifications on the Google models page.\n', ) - status: Optional[PublisherStatus] = Field( - None, description='The status of the publisher.' + role: Role1 = Field( + ..., + description='The identity of the entity that creates the message. The following values are supported: user: This indicates that the message is sent by a real person, typically a user-generated message. model: This indicates that the message is generated by the model. The model value is used to insert messages from the model into the conversation during multi-turn conversations. For non-multi-turn conversations, this field can be left blank or unset.\n', + examples=['user'], ) -class NodeVersion(BaseModel): - id: Optional[str] = None - version: Optional[str] = Field( +class IdeogramV3EditRequest(BaseModel): + color_palette: Optional[IdeogramColorPalette] = None + image: Optional[StrictBytes] = Field( None, - description='The version identifier, following semantic versioning. Must be unique for the node.', - ) - createdAt: Optional[datetime] = Field( - None, description='The date and time the version was created.' - ) - changelog: Optional[str] = Field( - None, description='Summary of changes made in this version' + description='The image being edited (max size 10MB); only JPEG, WebP and PNG formats are supported at this time.', ) - dependencies: Optional[List[str]] = Field( - None, description='A list of pip dependencies required by the node.' + magic_prompt: Optional[str] = Field( + None, + description='Determine if MagicPrompt should be used in generating the request or not.', ) - downloadUrl: Optional[str] = Field( - None, description='[Output Only] URL to download this version of the node' + mask: Optional[StrictBytes] = Field( + None, + description='A black and white image of the same size as the image being edited (max size 10MB). Black regions in the mask should match up with the regions of the image that you would like to edit; only JPEG, WebP and PNG formats are supported at this time.', ) - deprecated: Optional[bool] = Field( - None, description='Indicates if this version is deprecated.' + num_images: Optional[int] = Field( + None, description='The number of images to generate.' ) - status: Optional[NodeVersionStatus] = Field( - None, description='The status of the node version.' + prompt: str = Field( + ..., description='The prompt used to describe the edited result.' ) - status_reason: Optional[str] = Field( - None, description='The reason for the status change.' + rendering_speed: RenderingSpeed + seed: Optional[int] = Field( + None, description='Random seed. Set for reproducible generation.' ) - node_id: Optional[str] = Field( - None, description='The unique identifier of the node.' + style_codes: Optional[List[StyleCode]] = Field( + None, + description='A list of 8 character hexadecimal codes representing the style of the image. Cannot be used in conjunction with style_reference_images or style_type.', ) - comfy_node_extract_status: Optional[str] = Field( - None, description='The status of comfy node extraction process.' + style_reference_images: Optional[List[StrictBytes]] = Field( + None, + description='A set of images to use as style references (maximum total size 10MB across all style references). The images should be in JPEG, PNG or WebP format.', ) class IdeogramV3Request(BaseModel): - prompt: str = Field(..., description='The text prompt for image generation') - seed: Optional[int] = Field( - None, description='Seed value for reproducible generation' - ) - resolution: Optional[str] = Field( - None, description='Image resolution in format WxH', examples=['1280x800'] - ) aspect_ratio: Optional[str] = Field( None, description='Aspect ratio in format WxH', examples=['1x3'] ) - rendering_speed: RenderingSpeed - magic_prompt: Optional[MagicPrompt] = Field( + color_palette: Optional[ColorPalette] = None + magic_prompt: Optional[MagicPrompt2] = Field( None, description='Whether to enable magic prompt enhancement' ) negative_prompt: Optional[str] = Field( @@ -3117,83 +2737,88 @@ class IdeogramV3Request(BaseModel): num_images: Optional[int] = Field( None, description='Number of images to generate', ge=1 ) - color_palette: Optional[ColorPalette] = None + prompt: str = Field(..., description='The text prompt for image generation') + rendering_speed: RenderingSpeed + resolution: Optional[str] = Field( + None, description='Image resolution in format WxH', examples=['1280x800'] + ) + seed: Optional[int] = Field( + None, description='Seed value for reproducible generation' + ) style_codes: Optional[List[StyleCode]] = Field( None, description='Array of style codes in hexadecimal format' ) - style_type: Optional[StyleType] = Field( - None, description='The type of style to apply' - ) style_reference_images: Optional[List[str]] = Field( None, description='Array of reference image URLs or identifiers' ) + style_type: Optional[StyleType1] = Field( + None, description='The type of style to apply' + ) -class IdeogramV3EditRequest(BaseModel): - image: Optional[StrictBytes] = Field( - None, - description='The image being edited (max size 10MB); only JPEG, WebP and PNG formats are supported at this time.', - ) - mask: Optional[StrictBytes] = Field( - None, - description='A black and white image of the same size as the image being edited (max size 10MB). Black regions in the mask should match up with the regions of the image that you would like to edit; only JPEG, WebP and PNG formats are supported at this time.', - ) - prompt: str = Field( - ..., description='The prompt used to describe the edited result.' - ) - magic_prompt: Optional[str] = Field( - None, - description='Determine if MagicPrompt should be used in generating the request or not.', - ) - num_images: Optional[int] = Field( - None, description='The number of images to generate.' - ) - seed: Optional[int] = Field( - None, description='Random seed. Set for reproducible generation.' - ) - rendering_speed: RenderingSpeed - color_palette: Optional[IdeogramColorPalette] = Field( - None, - description='A color palette for generation, must EITHER be specified via one of the presets (name) or explicitly via hexadecimal representations of the color with optional weights (members). Not supported by V_1, V_1_TURBO, V_2A and V_2A_TURBO models.', - ) - style_codes: Optional[List[StyleCode]] = Field( - None, - description='A list of 8 character hexadecimal codes representing the style of the image. Cannot be used in conjunction with style_reference_images or style_type.', - ) - style_reference_images: Optional[List[StrictBytes]] = Field( - None, - description='A set of images to use as style references (maximum total size 10MB across all style references). The images should be in JPEG, PNG or WebP format.', +class ImagenGenerateImageResponse(BaseModel): + predictions: Optional[List[ImagenImagePrediction]] = None + + +class ImagenImageGenerationParameters(BaseModel): + addWatermark: Optional[bool] = None + aspectRatio: Optional[AspectRatio] = None + enhancePrompt: Optional[bool] = None + includeRaiReason: Optional[bool] = None + includeSafetyAttributes: Optional[bool] = None + outputOptions: Optional[ImagenOutputOptions] = None + personGeneration: Optional[PersonGeneration] = None + safetySetting: Optional[SafetySetting] = None + sampleCount: Optional[int] = Field(None, ge=1, le=4) + seed: Optional[int] = None + storageUri: Optional[AnyUrl] = None + + +class InputContent( + RootModel[Union[InputTextContent, InputImageContent, InputFileContent]] +): + root: Union[InputTextContent, InputImageContent, InputFileContent] + + +class InputMessageContentList(RootModel[List[InputContent]]): + root: List[InputContent] = Field( + ..., + description='A list of one or many input items to the model, containing different content \ntypes.\n', + title='Input item content list', ) class KlingCameraControl(BaseModel): - type: Optional[KlingCameraControlType] = None config: Optional[KlingCameraConfig] = None + type: Optional[KlingCameraControlType] = None -class KlingText2VideoRequest(BaseModel): - model_name: Optional[KlingVideoGenModelName] = 'kling-v2-master' - prompt: Optional[str] = Field( - None, description='Positive text prompt', max_length=2500 - ) - negative_prompt: Optional[str] = Field( - None, description='Negative text prompt', max_length=2500 +class KlingDualCharacterEffectInput(BaseModel): + duration: KlingVideoGenDuration + images: KlingDualCharacterImages + mode: Optional[KlingVideoGenMode] = 'std' + model_name: Optional[KlingCharacterEffectModelName] = 'kling-v1' + + +class KlingImage2VideoRequest(BaseModel): + aspect_ratio: Optional[KlingVideoGenAspectRatio] = '16:9' + callback_url: Optional[AnyUrl] = Field( + None, + description='The callback notification address. Server will notify when the task status changes.', ) + camera_control: Optional[KlingCameraControl] = None cfg_scale: Optional[KlingVideoGenCfgScale] = Field( default_factory=lambda: KlingVideoGenCfgScale.model_validate(0.5) ) - mode: Optional[KlingVideoGenMode] = 'std' - camera_control: Optional[KlingCameraControl] = None - aspect_ratio: Optional[KlingVideoGenAspectRatio] = '16:9' duration: Optional[KlingVideoGenDuration] = '5' - callback_url: Optional[AnyUrl] = Field( - None, description='The callback notification address' + dynamic_masks: Optional[List[DynamicMask]] = Field( + None, + description='Dynamic Brush Configuration List (up to 6 groups). For 5-second videos, trajectory length must not exceed 77 coordinates.', + ) + external_task_id: Optional[str] = Field( + None, + description='Customized Task ID. Must be unique within a single user account.', ) - external_task_id: Optional[str] = Field(None, description='Customized Task ID') - - -class KlingImage2VideoRequest(BaseModel): - model_name: Optional[KlingVideoGenModelName] = 'kling-v2-master' image: Optional[str] = Field( None, description='Reference Image - URL or Base64 encoded string, cannot exceed 10MB, resolution not less than 300*300px, aspect ratio between 1:2.5 ~ 2.5:1. Base64 should not include data:image prefix.', @@ -3202,434 +2827,317 @@ class KlingImage2VideoRequest(BaseModel): None, description='Reference Image - End frame control. URL or Base64 encoded string, cannot exceed 10MB, resolution not less than 300*300px. Base64 should not include data:image prefix.', ) - prompt: Optional[str] = Field( - None, description='Positive text prompt', max_length=2500 - ) + mode: Optional[KlingVideoGenMode] = 'std' + model_name: Optional[KlingVideoGenModelName] = 'kling-v2-master' negative_prompt: Optional[str] = Field( None, description='Negative text prompt', max_length=2500 ) - cfg_scale: Optional[KlingVideoGenCfgScale] = Field( - default_factory=lambda: KlingVideoGenCfgScale.model_validate(0.5) + prompt: Optional[str] = Field( + None, description='Positive text prompt', max_length=2500 ) - mode: Optional[KlingVideoGenMode] = 'std' static_mask: Optional[str] = Field( None, description='Static Brush Application Area (Mask image created by users using the motion brush). The aspect ratio must match the input image.', ) - dynamic_masks: Optional[List[DynamicMask]] = Field( - None, - description='Dynamic Brush Configuration List (up to 6 groups). For 5-second videos, trajectory length must not exceed 77 coordinates.', - ) - camera_control: Optional[KlingCameraControl] = None - aspect_ratio: Optional[KlingVideoGenAspectRatio] = '16:9' - duration: Optional[KlingVideoGenDuration] = '5' - callback_url: Optional[AnyUrl] = Field( - None, - description='The callback notification address. Server will notify when the task status changes.', - ) - external_task_id: Optional[str] = Field( - None, - description='Customized Task ID. Must be unique within a single user account.', - ) -class KlingVideoEffectsInput( - RootModel[Union[KlingSingleImageEffectInput, KlingDualCharacterEffectInput]] -): - root: Union[KlingSingleImageEffectInput, KlingDualCharacterEffectInput] +class TaskResult(BaseModel): + videos: Optional[List[KlingVideoResult]] = None -class StripeBillingDetails(BaseModel): - address: Optional[StripeAddress] = None - email: Optional[str] = None - name: Optional[str] = None - phone: Optional[str] = None - tax_id: Optional[Any] = None +class Data(BaseModel): + created_at: Optional[int] = Field(None, description='Task creation time') + task_id: Optional[str] = Field(None, description='Task ID') + task_info: Optional[TaskInfo] = None + task_result: Optional[TaskResult] = None + task_status: Optional[KlingTaskStatus] = None + updated_at: Optional[int] = Field(None, description='Task update time') -class StripePaymentMethodDetails(BaseModel): - card: Optional[StripeCardDetails] = None - type: Optional[str] = None +class KlingImage2VideoResponse(BaseModel): + code: Optional[int] = Field(None, description='Error code') + data: Optional[Data] = None + message: Optional[str] = Field(None, description='Error message') + request_id: Optional[str] = Field(None, description='Request ID') -class BFLFluxProFillInputs(BaseModel): - image: str = Field( - ..., - description='A Base64-encoded string representing the image you wish to modify. Can contain alpha mask if desired.', - title='Image', - ) - mask: Optional[str] = Field( +class TaskResult1(BaseModel): + images: Optional[List[KlingImageResult]] = None + + +class Data1(BaseModel): + created_at: Optional[int] = Field(None, description='Task creation time') + task_id: Optional[str] = Field(None, description='Task ID') + task_result: Optional[TaskResult1] = None + task_status: Optional[KlingTaskStatus] = None + task_status_msg: Optional[str] = Field(None, description='Task status information') + updated_at: Optional[int] = Field(None, description='Task update time') + + +class KlingImageGenerationsResponse(BaseModel): + code: Optional[int] = Field(None, description='Error code') + data: Optional[Data1] = None + message: Optional[str] = Field(None, description='Error message') + request_id: Optional[str] = Field(None, description='Request ID') + + +class KlingLipSyncInputObject(BaseModel): + audio_file: Optional[str] = Field( None, - description='A Base64-encoded string representing a mask for the areas you want to modify in the image. The mask should be the same dimensions as the image and in black and white. Black areas (0%) indicate no modification, while white areas (100%) specify areas for inpainting. Optional if you provide an alpha mask in the original image. Validation: The endpoint verifies that the dimensions of the mask match the original image.', - title='Mask', - ) - prompt: Optional[str] = Field( - '', - description='The description of the changes you want to make. This text guides the inpainting process, allowing you to specify features, styles, or modifications for the masked area.', - examples=['ein fantastisches bild'], - title='Prompt', - ) - steps: Optional[Steps] = Field( - default_factory=lambda: Steps.model_validate(50), - description='Number of steps for the image generation process', - examples=[50], - title='Steps', + description='Local Path of Audio File. Supported formats: .mp3/.wav/.m4a/.aac, maximum file size of 5MB. Base64 code.', ) - prompt_upsampling: Optional[bool] = Field( - False, - description='Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation', - title='Prompt Upsampling', + audio_type: Optional[KlingAudioUploadType] = None + audio_url: Optional[str] = Field( + None, + description='Audio File Download URL. Supported formats: .mp3/.wav/.m4a/.aac, maximum file size of 5MB.', ) - seed: Optional[int] = Field( - None, description='Optional seed for reproducibility', title='Seed' + mode: KlingLipSyncMode + text: Optional[str] = Field( + None, + description='Text Content for Lip-Sync Video Generation. Required when mode is text2video. Maximum length is 120 characters.', ) - guidance: Optional[Guidance] = Field( - default_factory=lambda: Guidance.model_validate(60), - description='Guidance strength for the image generation process', - title='Guidance', + video_id: Optional[str] = Field( + None, + description='The ID of the video generated by Kling AI. Only supports 5-second and 10-second videos generated within the last 30 days.', ) - output_format: Optional[BFLOutputFormat] = Field( - 'jpeg', - description="Output format for the generated image. Can be 'jpeg' or 'png'.", + video_url: Optional[str] = Field( + None, + description='Get link for uploaded video. Video files support .mp4/.mov, file size does not exceed 100MB, video length between 2-10s.', ) - safety_tolerance: Optional[int] = Field( - 2, - description='Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict.', - examples=[2], - ge=0, - le=6, - title='Safety Tolerance', + voice_id: Optional[str] = Field( + None, + description='Voice ID. Required when mode is text2video. The system offers a variety of voice options to choose from.', ) - webhook_url: Optional[WebhookUrl] = Field( - None, description='URL to receive webhook notifications', title='Webhook Url' + voice_language: Optional[KlingLipSyncVoiceLanguage] = 'en' + voice_speed: Optional[float] = Field( + 1, + description='Speech Rate. Valid range: 0.8~2.0, accurate to one decimal place.', + ge=0.8, + le=2.0, ) - webhook_secret: Optional[str] = Field( + + +class KlingLipSyncRequest(BaseModel): + callback_url: Optional[AnyUrl] = Field( None, - description='Optional secret for webhook signature verification', - title='Webhook Secret', + description='The callback notification address. Server will notify when the task status changes.', ) + input: KlingLipSyncInputObject + + +class TaskResult2(BaseModel): + videos: Optional[List[KlingVideoResult]] = None -class BFLHTTPValidationError(BaseModel): - detail: Optional[List[BFLValidationError]] = Field(None, title='Detail') +class Data2(BaseModel): + created_at: Optional[int] = Field(None, description='Task creation time') + task_id: Optional[str] = Field(None, description='Task ID') + task_info: Optional[TaskInfo] = None + task_result: Optional[TaskResult2] = None + task_status: Optional[KlingTaskStatus] = None + updated_at: Optional[int] = Field(None, description='Task update time') + + +class KlingLipSyncResponse(BaseModel): + code: Optional[int] = Field(None, description='Error code') + data: Optional[Data2] = None + message: Optional[str] = Field(None, description='Error message') + request_id: Optional[str] = Field(None, description='Request ID') -class BFLFluxProExpandInputs(BaseModel): +class KlingSingleImageEffectInput(BaseModel): + duration: KlingSingleImageEffectDuration image: str = Field( ..., - description='A Base64-encoded string representing the image you wish to expand.', - title='Image', - ) - top: Optional[Top] = Field( - 0, description='Number of pixels to expand at the top of the image', title='Top' + description='Reference Image. URL or Base64 encoded string (without data:image prefix). File size cannot exceed 10MB, resolution not less than 300*300px, aspect ratio between 1:2.5 ~ 2.5:1.', ) - bottom: Optional[Bottom] = Field( - 0, - description='Number of pixels to expand at the bottom of the image', - title='Bottom', + model_name: KlingSingleImageEffectModelName + + +class KlingText2VideoRequest(BaseModel): + aspect_ratio: Optional[KlingVideoGenAspectRatio] = '16:9' + callback_url: Optional[AnyUrl] = Field( + None, description='The callback notification address' ) - left: Optional[Left] = Field( - 0, - description='Number of pixels to expand on the left side of the image', - title='Left', + camera_control: Optional[KlingCameraControl] = None + cfg_scale: Optional[KlingVideoGenCfgScale] = Field( + default_factory=lambda: KlingVideoGenCfgScale.model_validate(0.5) ) - right: Optional[Right] = Field( - 0, - description='Number of pixels to expand on the right side of the image', - title='Right', + duration: Optional[KlingVideoGenDuration] = '5' + external_task_id: Optional[str] = Field(None, description='Customized Task ID') + mode: Optional[KlingVideoGenMode] = 'std' + model_name: Optional[KlingTextToVideoModelName] = 'kling-v1' + negative_prompt: Optional[str] = Field( + None, description='Negative text prompt', max_length=2500 ) prompt: Optional[str] = Field( - '', - description='The description of the changes you want to make. This text guides the expansion process, allowing you to specify features, styles, or modifications for the expanded areas.', - examples=['ein fantastisches bild'], - title='Prompt', - ) - steps: Optional[Steps] = Field( - default_factory=lambda: Steps.model_validate(50), - description='Number of steps for the image generation process', - examples=[50], - title='Steps', - ) - prompt_upsampling: Optional[bool] = Field( - False, - description='Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation', - title='Prompt Upsampling', - ) - seed: Optional[int] = Field( - None, description='Optional seed for reproducibility', title='Seed' - ) - guidance: Optional[Guidance] = Field( - default_factory=lambda: Guidance.model_validate(60), - description='Guidance strength for the image generation process', - title='Guidance', - ) - output_format: Optional[BFLOutputFormat] = Field( - 'jpeg', - description="Output format for the generated image. Can be 'jpeg' or 'png'.", - ) - safety_tolerance: Optional[int] = Field( - 2, - description='Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict.', - examples=[2], - ge=0, - le=6, - title='Safety Tolerance', - ) - webhook_url: Optional[WebhookUrl] = Field( - None, description='URL to receive webhook notifications', title='Webhook Url' - ) - webhook_secret: Optional[str] = Field( - None, - description='Optional secret for webhook signature verification', - title='Webhook Secret', + None, description='Positive text prompt', max_length=2500 ) -class BFLCannyInputs(BaseModel): - prompt: str = Field( - ..., - description='Text prompt for image generation', - examples=['ein fantastisches bild'], - title='Prompt', - ) - control_image: Optional[str] = Field( - None, - description='Base64 encoded image to use as control input if no preprocessed image is provided', - title='Control Image', - ) - preprocessed_image: Optional[str] = Field( - None, - description='Optional pre-processed image that will bypass the control preprocessing step', - title='Preprocessed Image', - ) - canny_low_threshold: Optional[CannyLowThreshold] = Field( - default_factory=lambda: CannyLowThreshold.model_validate(50), - description='Low threshold for Canny edge detection', - title='Canny Low Threshold', - ) - canny_high_threshold: Optional[CannyHighThreshold] = Field( - default_factory=lambda: CannyHighThreshold.model_validate(200), - description='High threshold for Canny edge detection', - title='Canny High Threshold', - ) - prompt_upsampling: Optional[bool] = Field( - False, - description='Whether to perform upsampling on the prompt', - title='Prompt Upsampling', - ) - seed: Optional[int] = Field( - None, - description='Optional seed for reproducibility', - examples=[42], - title='Seed', - ) - steps: Optional[Steps2] = Field( - default_factory=lambda: Steps2.model_validate(50), - description='Number of steps for the image generation process', - title='Steps', - ) - output_format: Optional[BFLOutputFormat] = Field( - 'jpeg', - description="Output format for the generated image. Can be 'jpeg' or 'png'.", - ) - guidance: Optional[Guidance2] = Field( - default_factory=lambda: Guidance2.model_validate(30), - description='Guidance strength for the image generation process', - title='Guidance', - ) - safety_tolerance: Optional[int] = Field( - 2, - description='Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict.', - ge=0, - le=6, - title='Safety Tolerance', - ) - webhook_url: Optional[WebhookUrl] = Field( - None, description='URL to receive webhook notifications', title='Webhook Url' - ) - webhook_secret: Optional[str] = Field( - None, - description='Optional secret for webhook signature verification', - title='Webhook Secret', - ) +class Data4(BaseModel): + created_at: Optional[int] = Field(None, description='Task creation time') + task_id: Optional[str] = Field(None, description='Task ID') + task_info: Optional[TaskInfo] = None + task_result: Optional[TaskResult2] = None + task_status: Optional[KlingTaskStatus] = None + updated_at: Optional[int] = Field(None, description='Task update time') -class BFLDepthInputs(BaseModel): - prompt: str = Field( - ..., - description='Text prompt for image generation', - examples=['ein fantastisches bild'], - title='Prompt', - ) - control_image: Optional[str] = Field( - None, - description='Base64 encoded image to use as control input', - title='Control Image', - ) - preprocessed_image: Optional[str] = Field( - None, - description='Optional pre-processed image that will bypass the control preprocessing step', - title='Preprocessed Image', - ) - prompt_upsampling: Optional[bool] = Field( - False, - description='Whether to perform upsampling on the prompt', - title='Prompt Upsampling', - ) - seed: Optional[int] = Field( +class KlingText2VideoResponse(BaseModel): + code: Optional[int] = Field(None, description='Error code') + data: Optional[Data4] = None + message: Optional[str] = Field(None, description='Error message') + request_id: Optional[str] = Field(None, description='Request ID') + + +class KlingVideoEffectsInput( + RootModel[Union[KlingSingleImageEffectInput, KlingDualCharacterEffectInput]] +): + root: Union[KlingSingleImageEffectInput, KlingDualCharacterEffectInput] + + +class KlingVideoEffectsRequest(BaseModel): + callback_url: Optional[AnyUrl] = Field( None, - description='Optional seed for reproducibility', - examples=[42], - title='Seed', - ) - steps: Optional[Steps2] = Field( - default_factory=lambda: Steps2.model_validate(50), - description='Number of steps for the image generation process', - title='Steps', - ) - output_format: Optional[BFLOutputFormat] = Field( - 'jpeg', - description="Output format for the generated image. Can be 'jpeg' or 'png'.", - ) - guidance: Optional[Guidance2] = Field( - default_factory=lambda: Guidance2.model_validate(15), - description='Guidance strength for the image generation process', - title='Guidance', - ) - safety_tolerance: Optional[int] = Field( - 2, - description='Tolerance level for input and output moderation. Between 0 and 6, 0 being most strict, 6 being least strict.', - ge=0, - le=6, - title='Safety Tolerance', - ) - webhook_url: Optional[WebhookUrl] = Field( - None, description='URL to receive webhook notifications', title='Webhook Url' + description='The callback notification address for the result of this task.', ) - webhook_secret: Optional[str] = Field( + effect_scene: Union[KlingDualCharacterEffectsScene, KlingSingleImageEffectsScene] + external_task_id: Optional[str] = Field( None, - description='Optional secret for webhook signature verification', - title='Webhook Secret', + description='Customized Task ID. Must be unique within a single user account.', ) + input: KlingVideoEffectsInput -class Controls(BaseModel): - artistic_level: Optional[int] = Field( - None, - description='Defines artistic tone of your image. At a simple level, the person looks straight at the camera in a static and clean style. Dynamic and eccentric levels introduce movement and creativity.', - ge=0, - le=5, - ) - colors: Optional[List[RGBColor]] = Field( - None, description='An array of preferable colors' - ) - background_color: Optional[RGBColor] = Field( - None, description='Use given color as a desired background color' - ) - no_text: Optional[bool] = Field(None, description='Do not embed text layouts') +class Data5(BaseModel): + created_at: Optional[int] = Field(None, description='Task creation time') + task_id: Optional[str] = Field(None, description='Task ID') + task_info: Optional[TaskInfo] = None + task_result: Optional[TaskResult2] = None + task_status: Optional[KlingTaskStatus] = None + updated_at: Optional[int] = Field(None, description='Task update time') -class RecraftImageGenerationRequest(BaseModel): - prompt: str = Field( - ..., description='The text prompt describing the image to generate' +class KlingVideoEffectsResponse(BaseModel): + code: Optional[int] = Field(None, description='Error code') + data: Optional[Data5] = None + message: Optional[str] = Field(None, description='Error message') + request_id: Optional[str] = Field(None, description='Request ID') + + +class KlingVideoExtendRequest(BaseModel): + callback_url: Optional[AnyUrl] = Field( + None, + description='The callback notification address. Server will notify when the task status changes.', ) - model: str = Field( - ..., description='The model to use for generation (e.g., "recraftv3")' + cfg_scale: Optional[KlingVideoGenCfgScale] = Field( + default_factory=lambda: KlingVideoGenCfgScale.model_validate(0.5) ) - style: Optional[str] = Field( + negative_prompt: Optional[str] = Field( None, - description='The style to apply to the generated image (e.g., "digital_illustration")', + description='Negative text prompt for elements to avoid in the extended video', + max_length=2500, ) - style_id: Optional[str] = Field( + prompt: Optional[str] = Field( None, - description='The style ID to apply to the generated image (e.g., "123e4567-e89b-12d3-a456-426614174000"). If style_id is provided, style should not be provided.', - ) - size: str = Field( - ..., description='The size of the generated image (e.g., "1024x1024")' + description='Positive text prompt for guiding the video extension', + max_length=2500, ) - controls: Optional[Controls] = Field( - None, description='The controls for the generated image' + video_id: Optional[str] = Field( + None, + description='The ID of the video to be extended. Supports videos generated by text-to-video, image-to-video, and previous video extension operations. Cannot exceed 3 minutes total duration after extension.', ) - n: int = Field(..., description='The number of images to generate', ge=1, le=4) -class LumaKeyframes(BaseModel): - frame0: Optional[LumaKeyframe] = None - frame1: Optional[LumaKeyframe] = None +class Data6(BaseModel): + created_at: Optional[int] = Field(None, description='Task creation time') + task_id: Optional[str] = Field(None, description='Task ID') + task_info: Optional[TaskInfo] = None + task_result: Optional[TaskResult2] = None + task_status: Optional[KlingTaskStatus] = None + updated_at: Optional[int] = Field(None, description='Task update time') + + +class KlingVideoExtendResponse(BaseModel): + code: Optional[int] = Field(None, description='Error code') + data: Optional[Data6] = None + message: Optional[str] = Field(None, description='Error message') + request_id: Optional[str] = Field(None, description='Request ID') class LumaGenerationRequest(BaseModel): - generation_type: Optional[GenerationType] = 'video' - prompt: str = Field(..., description='The prompt of the generation') aspect_ratio: LumaAspectRatio - loop: Optional[bool] = Field(None, description='Whether to loop the video') - keyframes: Optional[LumaKeyframes] = None callback_url: Optional[AnyUrl] = Field( None, description='The callback URL of the generation, a POST request with Generation object will be sent to the callback URL when the generation is dreaming, completed, or failed', ) + duration: LumaVideoModelOutputDuration + generation_type: Optional[GenerationType1] = 'video' + keyframes: Optional[LumaKeyframes] = None + loop: Optional[bool] = Field(None, description='Whether to loop the video') model: LumaVideoModel + prompt: str = Field(..., description='The prompt of the generation') resolution: LumaVideoModelOutputResolution - duration: LumaVideoModelOutputDuration -class LumaGeneration(BaseModel): - id: Optional[UUID] = Field(None, description='The ID of the generation') - generation_type: Optional[LumaGenerationType] = None - state: Optional[LumaState] = None - failure_reason: Optional[str] = Field( - None, description='The reason for the state of the generation' - ) - created_at: Optional[datetime] = Field( - None, description='The date and time when the generation was created' - ) - assets: Optional[LumaAssets] = None - model: Optional[str] = Field(None, description='The model used for the generation') - request: Optional[ - Union[ - LumaGenerationRequest, - LumaImageGenerationRequest, - LumaUpscaleVideoGenerationRequest, - LumaAudioGenerationRequest, - ] - ] = Field(None, description='The request of the generation') +class CharacterRef(BaseModel): + identity0: Optional[LumaImageIdentity] = None -class RunwayImageToVideoRequest(BaseModel): - promptImage: RunwayPromptImageObject - seed: int = Field( - ..., description='Random seed for generation', ge=0, le=4294967295 - ) - model: RunwayModelEnum = Field(..., description='Model to use for generation') - promptText: Optional[str] = Field( - None, description='Text prompt for the generation', max_length=1000 - ) - duration: RunwayDurationEnum = Field( - ..., description='The number of seconds of duration for the output video.' +class LumaImageGenerationRequest(BaseModel): + aspect_ratio: Optional[LumaAspectRatio] = '16:9' + callback_url: Optional[AnyUrl] = Field( + None, description='The callback URL for the generation' ) - ratio: RunwayAspectRatioEnum = Field( - ..., - description='The resolution (aspect ratio) of the output video. Allowable values depend on the selected model. 1280:768 and 768:1280 are only supported for gen3a_turbo.', + character_ref: Optional[CharacterRef] = None + generation_type: Optional[GenerationType2] = 'image' + image_ref: Optional[List[LumaImageRef]] = None + model: Optional[LumaImageModel] = 'photon-1' + modify_image_ref: Optional[LumaModifyImageRef] = None + prompt: Optional[str] = Field(None, description='The prompt of the generation') + style_ref: Optional[List[LumaImageRef]] = None + + +class LumaUpscaleVideoGenerationRequest(BaseModel): + callback_url: Optional[AnyUrl] = Field( + None, description='The callback URL for the upscale' ) + generation_type: Optional[GenerationType3] = 'upscale_video' + resolution: Optional[LumaVideoModelOutputResolution] = None -class RunwayTaskStatusResponse(BaseModel): - id: Optional[str] = Field(None, description='Task ID') - status: Optional[RunwayTaskStatusEnum] = Field(None, description='Task status') - createdAt: Optional[datetime] = Field(None, description='Task creation timestamp') - output: Optional[List[str]] = Field(None, description='Array of output video URLs') +class OutputContent(RootModel[Union[OutputTextContent, OutputAudioContent]]): + root: Union[OutputTextContent, OutputAudioContent] -class PikaHTTPValidationError(BaseModel): - detail: Optional[List[PikaValidationError]] = Field(None, title='Detail') +class OutputMessage(BaseModel): + content: List[OutputContent] = Field(..., description='The content of the message') + role: Role4 = Field(..., description='The role of the message') + type: Type14 = Field(..., description='The type of output item') -class PikaBodyGenerate22T2vGenerate22T2vPost(BaseModel): - promptText: str = Field(..., title='Prompttext') +class PikaBodyGenerate22I2vGenerate22I2vPost(BaseModel): + duration: Optional[PikaDurationEnum] = 5 + image: Optional[StrictBytes] = Field(None, title='Image') + negativePrompt: Optional[str] = Field(None, title='Negativeprompt') + promptText: Optional[str] = Field(None, title='Prompttext') + resolution: Optional[PikaResolutionEnum] = '1080p' + seed: Optional[int] = Field(None, title='Seed') + + +class PikaBodyGenerate22KeyframeGenerate22PikaframesPost(BaseModel): + duration: Optional[int] = Field(None, ge=5, le=10, title='Duration') + keyFrames: Optional[List[StrictBytes]] = Field( + None, description='Array of keyframe images', title='Keyframes' + ) negativePrompt: Optional[str] = Field(None, title='Negativeprompt') + promptText: str = Field(..., title='Prompttext') + resolution: Optional[PikaResolutionEnum] = '1080p' seed: Optional[int] = Field(None, title='Seed') - resolution: Optional[PikaResolutionEnum] = Field('1080p', title='Resolution') - duration: Optional[PikaDurationEnum] = Field(5, title='Duration') + + +class PikaBodyGenerate22T2vGenerate22T2vPost(BaseModel): aspectRatio: Optional[float] = Field( 1.7777777777777777, description='Aspect ratio (width / height)', @@ -3637,193 +3145,309 @@ class PikaBodyGenerate22T2vGenerate22T2vPost(BaseModel): le=2.5, title='Aspectratio', ) + duration: Optional[PikaDurationEnum] = 5 + negativePrompt: Optional[str] = Field(None, title='Negativeprompt') + promptText: str = Field(..., title='Prompttext') + resolution: Optional[PikaResolutionEnum] = '1080p' + seed: Optional[int] = Field(None, title='Seed') -class PikaBodyGenerate22I2vGenerate22I2vPost(BaseModel): +class PikaBodyGeneratePikaffectsGeneratePikaffectsPost(BaseModel): image: Optional[StrictBytes] = Field(None, title='Image') - promptText: Optional[str] = Field(None, title='Prompttext') negativePrompt: Optional[str] = Field(None, title='Negativeprompt') + pikaffect: Optional[Pikaffect] = None + promptText: Optional[str] = Field(None, title='Prompttext') seed: Optional[int] = Field(None, title='Seed') - resolution: Optional[PikaResolutionEnum] = Field('1080p', title='Resolution') - duration: Optional[PikaDurationEnum] = Field(5, title='Duration') -class PikaBodyGenerate22KeyframeGenerate22PikaframesPost(BaseModel): - keyFrames: Optional[List[StrictBytes]] = Field( - None, description='Array of keyframe images', title='Keyframes' +class PikaHTTPValidationError(BaseModel): + detail: Optional[List[PikaValidationError]] = Field(None, title='Detail') + + +class Reasoning(BaseModel): + effort: Optional[ReasoningEffort] = 'medium' + generate_summary: Optional[GenerateSummary] = Field( + None, + description="**Deprecated:** use `summary` instead.\n\nA summary of the reasoning performed by the model. This can be\nuseful for debugging and understanding the model's reasoning process.\nOne of `auto`, `concise`, or `detailed`.\n", + ) + summary: Optional[Summary] = Field( + None, + description="A summary of the reasoning performed by the model. This can be\nuseful for debugging and understanding the model's reasoning process.\nOne of `auto`, `concise`, or `detailed`.\n", ) - promptText: str = Field(..., title='Prompttext') - negativePrompt: Optional[str] = Field(None, title='Negativeprompt') - seed: Optional[int] = Field(None, title='Seed') - resolution: Optional[PikaResolutionEnum] = Field('1080p', title='Resolution') - duration: Optional[int] = Field(None, ge=5, le=10, title='Duration') -class PikaVideoResponse(BaseModel): - id: str = Field(..., title='Id') - status: PikaStatusEnum = Field( - ..., description='The status of the video', title='Status' +class ResponseError(BaseModel): + code: ResponseErrorCode + message: str = Field(..., description='A human-readable description of the error.') + + +class Rodin3DDownloadResponse(BaseModel): + list: Optional[RodinResourceItem] = None + + +class Rodin3DGenerateRequest(BaseModel): + images: str = Field(..., description='The reference images to generate 3D Assets.') + material: Optional[RodinMaterialType] = None + mesh_mode: Optional[RodinMeshModeType] = None + quality: Optional[RodinQualityType] = None + seed: Optional[int] = Field(None, description='Seed.') + tier: Optional[RodinTierType] = None + + +class Rodin3DGenerateResponse(BaseModel): + jobs: Optional[RodinGenerateJobsData] = None + message: Optional[str] = Field(None, description='message') + prompt: Optional[str] = Field(None, description='prompt') + submit_time: Optional[str] = Field(None, description='Time') + uuid: Optional[str] = Field(None, description='Task UUID') + + +class RunwayImageToVideoRequest(BaseModel): + duration: RunwayDurationEnum + model: RunwayModelEnum + promptImage: RunwayPromptImageObject + promptText: Optional[str] = Field( + None, description='Text prompt for the generation', max_length=1000 + ) + ratio: RunwayAspectRatioEnum + seed: int = Field( + ..., description='Random seed for generation', ge=0, le=4294967295 ) - url: Optional[str] = Field(None, title='Url') - progress: Optional[int] = Field(None, title='Progress') -class Node(BaseModel): - id: Optional[str] = Field(None, description='The unique identifier of the node.') - name: Optional[str] = Field(None, description='The display name of the node.') - category: Optional[str] = Field(None, description='The category of the node.') - description: Optional[str] = None - author: Optional[str] = None - license: Optional[str] = Field( - None, description="The path to the LICENSE file in the node's repository." +class TextResponseFormatConfiguration( + RootModel[ + Union[ + ResponseFormatText, TextResponseFormatJsonSchema, ResponseFormatJsonObject + ] + ] +): + root: Union[ + ResponseFormatText, TextResponseFormatJsonSchema, ResponseFormatJsonObject + ] = Field( + ..., + description='An object specifying the format that the model must output.\n\nConfiguring `{ "type": "json_schema" }` enables Structured Outputs, \nwhich ensures the model will match your supplied JSON schema. Learn more in the \n[Structured Outputs guide](/docs/guides/structured-outputs).\n\nThe default format is `{ "type": "text" }` with no additional options.\n\n**Not recommended for gpt-4o and newer models:**\n\nSetting to `{ "type": "json_object" }` enables the older JSON mode, which\nensures the message the model generates is valid JSON. Using `json_schema`\nis preferred for models that support it.\n', + ) + + +class Tool( + RootModel[ + Union[ + FileSearchTool, FunctionTool, WebSearchPreviewTool, ComputerUsePreviewTool + ] + ] +): + root: Union[ + FileSearchTool, FunctionTool, WebSearchPreviewTool, ComputerUsePreviewTool + ] = Field(..., discriminator='type') + + +class EasyInputMessage(BaseModel): + content: Union[str, InputMessageContentList] = Field( + ..., + description='Text, image, or audio input to the model, used to generate a response.\nCan also contain previous assistant responses.\n', ) - icon: Optional[str] = Field(None, description="URL to the node's icon.") - repository: Optional[str] = Field(None, description="URL to the node's repository.") - tags: Optional[List[str]] = None - latest_version: Optional[NodeVersion] = Field( - None, description='The latest version of the node.' + role: Role = Field( + ..., + description='The role of the message input. One of `user`, `assistant`, `system`, or\n`developer`.\n', ) - rating: Optional[float] = Field(None, description='The average rating of the node.') - downloads: Optional[int] = Field( - None, description='The number of downloads of the node.' + type: Optional[Type2] = Field( + None, description='The type of the message input. Always `message`.\n' ) - publisher: Optional[Publisher] = Field( - None, description='The publisher of the node.' + + +class GeminiContent(BaseModel): + parts: List[GeminiPart] + role: Role1 = Field(..., examples=['user']) + + +class GeminiGenerateContentRequest(BaseModel): + contents: List[GeminiContent] + generationConfig: Optional[GeminiGenerationConfig] = None + safetySettings: Optional[List[GeminiSafetySetting]] = None + systemInstruction: Optional[GeminiSystemInstructionContent] = None + tools: Optional[List[GeminiTool]] = None + videoMetadata: Optional[GeminiVideoMetadata] = None + + +class ImagenGenerateImageRequest(BaseModel): + instances: List[ImagenImageGenerationInstance] + parameters: ImagenImageGenerationParameters + + +class InputMessage(BaseModel): + content: Optional[InputMessageContentList] = None + role: Optional[Role3] = None + status: Optional[Status2] = None + type: Optional[Type9] = None + + +class Item( + RootModel[ + Union[ + InputMessage, + OutputMessage, + FileSearchToolCall, + ComputerToolCall, + WebSearchToolCall, + FunctionToolCall, + ReasoningItem, + ] + ] +): + root: Union[ + InputMessage, + OutputMessage, + FileSearchToolCall, + ComputerToolCall, + WebSearchToolCall, + FunctionToolCall, + ReasoningItem, + ] = Field(..., description='Content item used to generate a response.\n') + + +class LumaGeneration(BaseModel): + assets: Optional[LumaAssets] = None + created_at: Optional[datetime] = Field( + None, description='The date and time when the generation was created' ) - status: Optional[NodeStatus] = Field(None, description='The status of the node.') - status_detail: Optional[str] = Field( - None, description='The status detail of the node.' + failure_reason: Optional[str] = Field( + None, description='The reason for the state of the generation' ) - translations: Optional[Dict[str, Dict[str, Any]]] = None + generation_type: Optional[LumaGenerationType] = None + id: Optional[UUID] = Field(None, description='The ID of the generation') + model: Optional[str] = Field(None, description='The model used for the generation') + request: Optional[ + Union[ + LumaGenerationRequest, + LumaImageGenerationRequest, + LumaUpscaleVideoGenerationRequest, + LumaAudioGenerationRequest, + ] + ] = Field(None, description='The request of the generation') + state: Optional[LumaState] = None -class KlingVideoEffectsRequest(BaseModel): - effect_scene: Union[KlingDualCharacterEffectsScene, KlingSingleImageEffectsScene] - input: KlingVideoEffectsInput - callback_url: Optional[AnyUrl] = Field( +class OutputItem( + RootModel[ + Union[ + OutputMessage, + FileSearchToolCall, + FunctionToolCall, + WebSearchToolCall, + ComputerToolCall, + ReasoningItem, + ] + ] +): + root: Union[ + OutputMessage, + FileSearchToolCall, + FunctionToolCall, + WebSearchToolCall, + ComputerToolCall, + ReasoningItem, + ] + + +class Text(BaseModel): + format: Optional[TextResponseFormatConfiguration] = None + + +class ResponseProperties(BaseModel): + instructions: Optional[str] = Field( None, - description='The callback notification address for the result of this task.', + description="Inserts a system (or developer) message as the first item in the model's context.\n\nWhen using along with `previous_response_id`, the instructions from a previous\nresponse will not be carried over to the next response. This makes it simple\nto swap out system (or developer) messages in new responses.\n", ) - external_task_id: Optional[str] = Field( + max_output_tokens: Optional[int] = Field( None, - description='Customized Task ID. Must be unique within a single user account.', + description='An upper bound for the number of tokens that can be generated for a response, including visible output tokens and [reasoning tokens](/docs/guides/reasoning).\n', + ) + model: Optional[OpenAIModels] = None + previous_response_id: Optional[str] = Field( + None, + description='The unique ID of the previous response to the model. Use this to\ncreate multi-turn conversations. Learn more about \n[conversation state](/docs/guides/conversation-state).\n', + ) + reasoning: Optional[Reasoning] = None + text: Optional[Text] = None + tool_choice: Optional[ + Union[ToolChoiceOptions, ToolChoiceTypes, ToolChoiceFunction] + ] = Field( + None, + description='How the model should select which tool (or tools) to use when generating\na response. See the `tools` parameter to see how to specify which tools\nthe model can call.\n', + ) + tools: Optional[List[Tool]] = None + truncation: Optional[Truncation1] = Field( + 'disabled', + description="The truncation strategy to use for the model response.\n- `auto`: If the context of this response and previous ones exceeds\n the model's context window size, the model will truncate the \n response to fit the context window by dropping input items in the\n middle of the conversation. \n- `disabled` (default): If a model response will exceed the context window \n size for a model, the request will fail with a 400 error.\n", ) -class StripeCharge(BaseModel): - id: Optional[str] = None - object: Optional[Object2] = None - amount: Optional[int] = None - amount_captured: Optional[int] = None - amount_refunded: Optional[int] = None - application: Optional[str] = None - application_fee: Optional[str] = None - application_fee_amount: Optional[int] = None - balance_transaction: Optional[str] = None - billing_details: Optional[StripeBillingDetails] = None - calculated_statement_descriptor: Optional[str] = None - captured: Optional[bool] = None - created: Optional[int] = None - currency: Optional[str] = None - customer: Optional[str] = None - description: Optional[str] = None - destination: Optional[Any] = None - dispute: Optional[Any] = None - disputed: Optional[bool] = None - failure_balance_transaction: Optional[Any] = None - failure_code: Optional[Any] = None - failure_message: Optional[Any] = None - fraud_details: Optional[Dict[str, Any]] = None - invoice: Optional[Any] = None - livemode: Optional[bool] = None - metadata: Optional[Dict[str, Any]] = None - on_behalf_of: Optional[Any] = None - order: Optional[Any] = None - outcome: Optional[StripeOutcome] = None - paid: Optional[bool] = None - payment_intent: Optional[str] = None - payment_method: Optional[str] = None - payment_method_details: Optional[StripePaymentMethodDetails] = None - radar_options: Optional[Dict[str, Any]] = None - receipt_email: Optional[str] = None - receipt_number: Optional[str] = None - receipt_url: Optional[str] = None - refunded: Optional[bool] = None - refunds: Optional[StripeRefundList] = None - review: Optional[Any] = None - shipping: Optional[StripeShipping] = None - source: Optional[Any] = None - source_transfer: Optional[Any] = None - statement_descriptor: Optional[Any] = None - statement_descriptor_suffix: Optional[Any] = None - status: Optional[str] = None - transfer_data: Optional[Any] = None - transfer_group: Optional[Any] = None - - -class StripeChargeList(BaseModel): - object: Optional[str] = None - data: Optional[List[StripeCharge]] = None - has_more: Optional[bool] = None - total_count: Optional[int] = None - url: Optional[str] = None +class GeminiCandidate(BaseModel): + citationMetadata: Optional[GeminiCitationMetadata] = None + content: Optional[GeminiContent] = None + finishReason: Optional[str] = None + safetyRatings: Optional[List[GeminiSafetyRating]] = None -class StripePaymentIntent(BaseModel): - id: Optional[str] = None - object: Optional[Object1] = None - amount: Optional[int] = None - amount_capturable: Optional[int] = None - amount_details: Optional[StripeAmountDetails] = None - amount_received: Optional[int] = None - application: Optional[str] = None - application_fee_amount: Optional[int] = None - automatic_payment_methods: Optional[Any] = None - canceled_at: Optional[int] = None - cancellation_reason: Optional[str] = None - capture_method: Optional[str] = None - charges: Optional[StripeChargeList] = None - client_secret: Optional[str] = None - confirmation_method: Optional[str] = None - created: Optional[int] = None - currency: Optional[str] = None - customer: Optional[str] = None - description: Optional[str] = None - invoice: Optional[str] = None - last_payment_error: Optional[Any] = None - latest_charge: Optional[str] = None - livemode: Optional[bool] = None - metadata: Optional[Dict[str, Any]] = None - next_action: Optional[Any] = None - on_behalf_of: Optional[Any] = None - payment_method: Optional[str] = None - payment_method_configuration_details: Optional[Any] = None - payment_method_options: Optional[StripePaymentMethodOptions] = None - payment_method_types: Optional[List[str]] = None - processing: Optional[Any] = None - receipt_email: Optional[str] = None - review: Optional[Any] = None - setup_future_usage: Optional[Any] = None - shipping: Optional[StripeShipping] = None - source: Optional[Any] = None - statement_descriptor: Optional[Any] = None - statement_descriptor_suffix: Optional[Any] = None - status: Optional[str] = None - transfer_data: Optional[Any] = None - transfer_group: Optional[Any] = None +class GeminiGenerateContentResponse(BaseModel): + candidates: Optional[List[GeminiCandidate]] = None + promptFeedback: Optional[GeminiPromptFeedback] = None -class Data8(BaseModel): - object: Optional[StripePaymentIntent] = None +class InputItem(RootModel[Union[EasyInputMessage, Item]]): + root: Union[EasyInputMessage, Item] -class StripeEvent(BaseModel): - id: str - object: Object - api_version: Optional[str] = None - created: Optional[int] = None - data: Data8 - livemode: Optional[bool] = None - pending_webhooks: Optional[int] = None - request: Optional[StripeRequestInfo] = None - type: Type +class OpenAICreateResponse(CreateModelResponseProperties, ResponseProperties): + include: Optional[List[Includable]] = Field( + None, + description='Specify additional output data to include in the model response. Currently\nsupported values are:\n- `file_search_call.results`: Include the search results of\n the file search tool call.\n- `message.input_image.image_url`: Include image urls from the input message.\n- `computer_call_output.output.image_url`: Include image urls from the computer call output.\n', + ) + input: Union[str, List[InputItem]] = Field( + ..., + description='Text, image, or file inputs to the model, used to generate a response.\n\nLearn more:\n- [Text inputs and outputs](/docs/guides/text)\n- [Image inputs](/docs/guides/images)\n- [File inputs](/docs/guides/pdf-files)\n- [Conversation state](/docs/guides/conversation-state)\n- [Function calling](/docs/guides/function-calling)\n', + ) + parallel_tool_calls: Optional[bool] = Field( + True, description='Whether to allow the model to run tool calls in parallel.\n' + ) + store: Optional[bool] = Field( + True, + description='Whether to store the generated model response for later retrieval via\nAPI.\n', + ) + stream: Optional[bool] = Field( + False, + description='If set to true, the model response data will be streamed to the client\nas it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).\nSee the [Streaming section below](/docs/api-reference/responses-streaming)\nfor more information.\n', + ) + usage: Optional[ResponseUsage] = None + + +class OpenAIResponse(ModelResponseProperties, ResponseProperties): + created_at: Optional[float] = Field( + None, + description='Unix timestamp (in seconds) of when this Response was created.', + ) + error: Optional[ResponseError] = None + id: Optional[str] = Field(None, description='Unique identifier for this Response.') + incomplete_details: Optional[IncompleteDetails] = Field( + None, description='Details about why the response is incomplete.\n' + ) + object: Optional[Object] = Field( + None, description='The object type of this resource - always set to `response`.' + ) + output: Optional[List[OutputItem]] = Field( + None, + description="An array of content items generated by the model.\n\n- The length and order of items in the `output` array is dependent\n on the model's response.\n- Rather than accessing the first item in the `output` array and \n assuming it's an `assistant` message with the content generated by\n the model, you might consider using the `output_text` property where\n supported in SDKs.\n", + ) + output_text: Optional[str] = Field( + None, + description='SDK-only convenience property that contains the aggregated text output \nfrom all `output_text` items in the `output` array, if any are present. \nSupported in the Python and JavaScript SDKs.\n', + ) + parallel_tool_calls: Optional[bool] = Field( + True, description='Whether to allow the model to run tool calls in parallel.\n' + ) + status: Optional[Status6] = Field( + None, + description='The status of the response generation. One of `completed`, `failed`, `in_progress`, or `incomplete`.', + ) + usage: Optional[ResponseUsage] = None diff --git a/comfy_api_nodes/apis/client.py b/comfy_api_nodes/apis/client.py index 62866216fff..0897d5d7861 100644 --- a/comfy_api_nodes/apis/client.py +++ b/comfy_api_nodes/apis/client.py @@ -139,7 +139,7 @@ class EmptyRequest(BaseModel): class UploadRequest(BaseModel): file_name: str = Field(..., description="Filename to upload") - content_type: str | None = Field( + content_type: Optional[str] = Field( None, description="Mime type of the file. For example: image/png, image/jpeg, video/mp4, etc.", ) diff --git a/comfy_api_nodes/apis/rodin_api.py b/comfy_api_nodes/apis/rodin_api.py new file mode 100644 index 00000000000..b0cf171fa63 --- /dev/null +++ b/comfy_api_nodes/apis/rodin_api.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +from enum import Enum +from typing import Optional, List +from pydantic import BaseModel, Field + + +class Rodin3DGenerateRequest(BaseModel): + seed: int = Field(..., description="seed_") + tier: str = Field(..., description="Tier of generation.") + material: str = Field(..., description="The material type.") + quality: str = Field(..., description="The generation quality of the mesh.") + mesh_mode: str = Field(..., description="It controls the type of faces of generated models.") + +class GenerateJobsData(BaseModel): + uuids: List[str] = Field(..., description="str LIST") + subscription_key: str = Field(..., description="subscription key") + +class Rodin3DGenerateResponse(BaseModel): + message: Optional[str] = Field(None, description="Return message.") + prompt: Optional[str] = Field(None, description="Generated Prompt from image.") + submit_time: Optional[str] = Field(None, description="Submit Time") + uuid: Optional[str] = Field(None, description="Task str") + jobs: Optional[GenerateJobsData] = Field(None, description="Details of jobs") + +class JobStatus(str, Enum): + """ + Status for jobs + """ + Done = "Done" + Failed = "Failed" + Generating = "Generating" + Waiting = "Waiting" + +class Rodin3DCheckStatusRequest(BaseModel): + subscription_key: str = Field(..., description="subscription from generate endpoint") + +class JobItem(BaseModel): + uuid: str = Field(..., description="uuid") + status: JobStatus = Field(...,description="Status Currently") + +class Rodin3DCheckStatusResponse(BaseModel): + jobs: List[JobItem] = Field(..., description="Job status List") + +class Rodin3DDownloadRequest(BaseModel): + task_uuid: str = Field(..., description="Task str") + +class RodinResourceItem(BaseModel): + url: str = Field(..., description="Download Url") + name: str = Field(..., description="File name with ext") + +class Rodin3DDownloadResponse(BaseModel): + list: List[RodinResourceItem] = Field(..., description="Source List") + + + + diff --git a/comfy_api_nodes/apis/tripo_api.py b/comfy_api_nodes/apis/tripo_api.py new file mode 100644 index 00000000000..626e8d277da --- /dev/null +++ b/comfy_api_nodes/apis/tripo_api.py @@ -0,0 +1,275 @@ +from __future__ import annotations +from comfy_api_nodes.apis import ( + TripoModelVersion, + TripoTextureQuality, +) +from enum import Enum +from typing import Optional, List, Dict, Any, Union + +from pydantic import BaseModel, Field, RootModel + +class TripoStyle(str, Enum): + PERSON_TO_CARTOON = "person:person2cartoon" + ANIMAL_VENOM = "animal:venom" + OBJECT_CLAY = "object:clay" + OBJECT_STEAMPUNK = "object:steampunk" + OBJECT_CHRISTMAS = "object:christmas" + OBJECT_BARBIE = "object:barbie" + GOLD = "gold" + ANCIENT_BRONZE = "ancient_bronze" + NONE = "None" + +class TripoTaskType(str, Enum): + TEXT_TO_MODEL = "text_to_model" + IMAGE_TO_MODEL = "image_to_model" + MULTIVIEW_TO_MODEL = "multiview_to_model" + TEXTURE_MODEL = "texture_model" + REFINE_MODEL = "refine_model" + ANIMATE_PRERIGCHECK = "animate_prerigcheck" + ANIMATE_RIG = "animate_rig" + ANIMATE_RETARGET = "animate_retarget" + STYLIZE_MODEL = "stylize_model" + CONVERT_MODEL = "convert_model" + +class TripoTextureAlignment(str, Enum): + ORIGINAL_IMAGE = "original_image" + GEOMETRY = "geometry" + +class TripoOrientation(str, Enum): + ALIGN_IMAGE = "align_image" + DEFAULT = "default" + +class TripoOutFormat(str, Enum): + GLB = "glb" + FBX = "fbx" + +class TripoTopology(str, Enum): + BIP = "bip" + QUAD = "quad" + +class TripoSpec(str, Enum): + MIXAMO = "mixamo" + TRIPO = "tripo" + +class TripoAnimation(str, Enum): + IDLE = "preset:idle" + WALK = "preset:walk" + CLIMB = "preset:climb" + JUMP = "preset:jump" + RUN = "preset:run" + SLASH = "preset:slash" + SHOOT = "preset:shoot" + HURT = "preset:hurt" + FALL = "preset:fall" + TURN = "preset:turn" + +class TripoStylizeStyle(str, Enum): + LEGO = "lego" + VOXEL = "voxel" + VORONOI = "voronoi" + MINECRAFT = "minecraft" + +class TripoConvertFormat(str, Enum): + GLTF = "GLTF" + USDZ = "USDZ" + FBX = "FBX" + OBJ = "OBJ" + STL = "STL" + _3MF = "3MF" + +class TripoTextureFormat(str, Enum): + BMP = "BMP" + DPX = "DPX" + HDR = "HDR" + JPEG = "JPEG" + OPEN_EXR = "OPEN_EXR" + PNG = "PNG" + TARGA = "TARGA" + TIFF = "TIFF" + WEBP = "WEBP" + +class TripoTaskStatus(str, Enum): + QUEUED = "queued" + RUNNING = "running" + SUCCESS = "success" + FAILED = "failed" + CANCELLED = "cancelled" + UNKNOWN = "unknown" + BANNED = "banned" + EXPIRED = "expired" + +class TripoFileTokenReference(BaseModel): + type: Optional[str] = Field(None, description='The type of the reference') + file_token: str + +class TripoUrlReference(BaseModel): + type: Optional[str] = Field(None, description='The type of the reference') + url: str + +class TripoObjectStorage(BaseModel): + bucket: str + key: str + +class TripoObjectReference(BaseModel): + type: str + object: TripoObjectStorage + +class TripoFileEmptyReference(BaseModel): + pass + +class TripoFileReference(RootModel): + root: Union[TripoFileTokenReference, TripoUrlReference, TripoObjectReference, TripoFileEmptyReference] + +class TripoGetStsTokenRequest(BaseModel): + format: str = Field(..., description='The format of the image') + +class TripoTextToModelRequest(BaseModel): + type: TripoTaskType = Field(TripoTaskType.TEXT_TO_MODEL, description='Type of task') + prompt: str = Field(..., description='The text prompt describing the model to generate', max_length=1024) + negative_prompt: Optional[str] = Field(None, description='The negative text prompt', max_length=1024) + model_version: Optional[TripoModelVersion] = TripoModelVersion.V2_5 + face_limit: Optional[int] = Field(None, description='The number of faces to limit the generation to') + texture: Optional[bool] = Field(True, description='Whether to apply texture to the generated model') + pbr: Optional[bool] = Field(True, description='Whether to apply PBR to the generated model') + image_seed: Optional[int] = Field(None, description='The seed for the text') + model_seed: Optional[int] = Field(None, description='The seed for the model') + texture_seed: Optional[int] = Field(None, description='The seed for the texture') + texture_quality: Optional[TripoTextureQuality] = TripoTextureQuality.standard + style: Optional[TripoStyle] = None + auto_size: Optional[bool] = Field(False, description='Whether to auto-size the model') + quad: Optional[bool] = Field(False, description='Whether to apply quad to the generated model') + +class TripoImageToModelRequest(BaseModel): + type: TripoTaskType = Field(TripoTaskType.IMAGE_TO_MODEL, description='Type of task') + file: TripoFileReference = Field(..., description='The file reference to convert to a model') + model_version: Optional[TripoModelVersion] = Field(None, description='The model version to use for generation') + face_limit: Optional[int] = Field(None, description='The number of faces to limit the generation to') + texture: Optional[bool] = Field(True, description='Whether to apply texture to the generated model') + pbr: Optional[bool] = Field(True, description='Whether to apply PBR to the generated model') + model_seed: Optional[int] = Field(None, description='The seed for the model') + texture_seed: Optional[int] = Field(None, description='The seed for the texture') + texture_quality: Optional[TripoTextureQuality] = TripoTextureQuality.standard + texture_alignment: Optional[TripoTextureAlignment] = Field(TripoTextureAlignment.ORIGINAL_IMAGE, description='The texture alignment method') + style: Optional[TripoStyle] = Field(None, description='The style to apply to the generated model') + auto_size: Optional[bool] = Field(False, description='Whether to auto-size the model') + orientation: Optional[TripoOrientation] = TripoOrientation.DEFAULT + quad: Optional[bool] = Field(False, description='Whether to apply quad to the generated model') + +class TripoMultiviewToModelRequest(BaseModel): + type: TripoTaskType = TripoTaskType.MULTIVIEW_TO_MODEL + files: List[TripoFileReference] = Field(..., description='The file references to convert to a model') + model_version: Optional[TripoModelVersion] = Field(None, description='The model version to use for generation') + orthographic_projection: Optional[bool] = Field(False, description='Whether to use orthographic projection') + face_limit: Optional[int] = Field(None, description='The number of faces to limit the generation to') + texture: Optional[bool] = Field(True, description='Whether to apply texture to the generated model') + pbr: Optional[bool] = Field(True, description='Whether to apply PBR to the generated model') + model_seed: Optional[int] = Field(None, description='The seed for the model') + texture_seed: Optional[int] = Field(None, description='The seed for the texture') + texture_quality: Optional[TripoTextureQuality] = TripoTextureQuality.standard + texture_alignment: Optional[TripoTextureAlignment] = TripoTextureAlignment.ORIGINAL_IMAGE + auto_size: Optional[bool] = Field(False, description='Whether to auto-size the model') + orientation: Optional[TripoOrientation] = Field(TripoOrientation.DEFAULT, description='The orientation for the model') + quad: Optional[bool] = Field(False, description='Whether to apply quad to the generated model') + +class TripoTextureModelRequest(BaseModel): + type: TripoTaskType = Field(TripoTaskType.TEXTURE_MODEL, description='Type of task') + original_model_task_id: str = Field(..., description='The task ID of the original model') + texture: Optional[bool] = Field(True, description='Whether to apply texture to the model') + pbr: Optional[bool] = Field(True, description='Whether to apply PBR to the model') + model_seed: Optional[int] = Field(None, description='The seed for the model') + texture_seed: Optional[int] = Field(None, description='The seed for the texture') + texture_quality: Optional[TripoTextureQuality] = Field(None, description='The quality of the texture') + texture_alignment: Optional[TripoTextureAlignment] = Field(TripoTextureAlignment.ORIGINAL_IMAGE, description='The texture alignment method') + +class TripoRefineModelRequest(BaseModel): + type: TripoTaskType = Field(TripoTaskType.REFINE_MODEL, description='Type of task') + draft_model_task_id: str = Field(..., description='The task ID of the draft model') + +class TripoAnimatePrerigcheckRequest(BaseModel): + type: TripoTaskType = Field(TripoTaskType.ANIMATE_PRERIGCHECK, description='Type of task') + original_model_task_id: str = Field(..., description='The task ID of the original model') + +class TripoAnimateRigRequest(BaseModel): + type: TripoTaskType = Field(TripoTaskType.ANIMATE_RIG, description='Type of task') + original_model_task_id: str = Field(..., description='The task ID of the original model') + out_format: Optional[TripoOutFormat] = Field(TripoOutFormat.GLB, description='The output format') + spec: Optional[TripoSpec] = Field(TripoSpec.TRIPO, description='The specification for rigging') + +class TripoAnimateRetargetRequest(BaseModel): + type: TripoTaskType = Field(TripoTaskType.ANIMATE_RETARGET, description='Type of task') + original_model_task_id: str = Field(..., description='The task ID of the original model') + animation: TripoAnimation = Field(..., description='The animation to apply') + out_format: Optional[TripoOutFormat] = Field(TripoOutFormat.GLB, description='The output format') + bake_animation: Optional[bool] = Field(True, description='Whether to bake the animation') + +class TripoStylizeModelRequest(BaseModel): + type: TripoTaskType = Field(TripoTaskType.STYLIZE_MODEL, description='Type of task') + style: TripoStylizeStyle = Field(..., description='The style to apply to the model') + original_model_task_id: str = Field(..., description='The task ID of the original model') + block_size: Optional[int] = Field(80, description='The block size for stylization') + +class TripoConvertModelRequest(BaseModel): + type: TripoTaskType = Field(TripoTaskType.CONVERT_MODEL, description='Type of task') + format: TripoConvertFormat = Field(..., description='The format to convert to') + original_model_task_id: str = Field(..., description='The task ID of the original model') + quad: Optional[bool] = Field(False, description='Whether to apply quad to the model') + force_symmetry: Optional[bool] = Field(False, description='Whether to force symmetry') + face_limit: Optional[int] = Field(10000, description='The number of faces to limit the conversion to') + flatten_bottom: Optional[bool] = Field(False, description='Whether to flatten the bottom of the model') + flatten_bottom_threshold: Optional[float] = Field(0.01, description='The threshold for flattening the bottom') + texture_size: Optional[int] = Field(4096, description='The size of the texture') + texture_format: Optional[TripoTextureFormat] = Field(TripoTextureFormat.JPEG, description='The format of the texture') + pivot_to_center_bottom: Optional[bool] = Field(False, description='Whether to pivot to the center bottom') + +class TripoTaskRequest(RootModel): + root: Union[ + TripoTextToModelRequest, + TripoImageToModelRequest, + TripoMultiviewToModelRequest, + TripoTextureModelRequest, + TripoRefineModelRequest, + TripoAnimatePrerigcheckRequest, + TripoAnimateRigRequest, + TripoAnimateRetargetRequest, + TripoStylizeModelRequest, + TripoConvertModelRequest + ] + +class TripoTaskOutput(BaseModel): + model: Optional[str] = Field(None, description='URL to the model') + base_model: Optional[str] = Field(None, description='URL to the base model') + pbr_model: Optional[str] = Field(None, description='URL to the PBR model') + rendered_image: Optional[str] = Field(None, description='URL to the rendered image') + riggable: Optional[bool] = Field(None, description='Whether the model is riggable') + +class TripoTask(BaseModel): + task_id: str = Field(..., description='The task ID') + type: Optional[str] = Field(None, description='The type of task') + status: Optional[TripoTaskStatus] = Field(None, description='The status of the task') + input: Optional[Dict[str, Any]] = Field(None, description='The input parameters for the task') + output: Optional[TripoTaskOutput] = Field(None, description='The output of the task') + progress: Optional[int] = Field(None, description='The progress of the task', ge=0, le=100) + create_time: Optional[int] = Field(None, description='The creation time of the task') + running_left_time: Optional[int] = Field(None, description='The estimated time left for the task') + queue_position: Optional[int] = Field(None, description='The position in the queue') + +class TripoTaskResponse(BaseModel): + code: int = Field(0, description='The response code') + data: TripoTask = Field(..., description='The task data') + +class TripoGeneralResponse(BaseModel): + code: int = Field(0, description='The response code') + data: Dict[str, str] = Field(..., description='The task ID data') + +class TripoBalanceData(BaseModel): + balance: float = Field(..., description='The account balance') + frozen: float = Field(..., description='The frozen balance') + +class TripoBalanceResponse(BaseModel): + code: int = Field(0, description='The response code') + data: TripoBalanceData = Field(..., description='The balance data') + +class TripoErrorResponse(BaseModel): + code: int = Field(..., description='The error code') + message: str = Field(..., description='The error message') + suggestion: str = Field(..., description='The suggestion for fixing the error') diff --git a/comfy_api_nodes/nodes_gemini.py b/comfy_api_nodes/nodes_gemini.py new file mode 100644 index 00000000000..ae7b048462a --- /dev/null +++ b/comfy_api_nodes/nodes_gemini.py @@ -0,0 +1,446 @@ +""" +API Nodes for Gemini Multimodal LLM Usage via Remote API +See: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference +""" + +import os +from enum import Enum +from typing import Optional, Literal + +import torch + +import folder_paths +from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeDict +from server import PromptServer +from comfy_api_nodes.apis import ( + GeminiContent, + GeminiGenerateContentRequest, + GeminiGenerateContentResponse, + GeminiInlineData, + GeminiPart, + GeminiMimeType, +) +from comfy_api_nodes.apis.client import ( + ApiEndpoint, + HttpMethod, + SynchronousOperation, +) +from comfy_api_nodes.apinode_utils import ( + validate_string, + audio_to_base64_string, + video_to_base64_string, + tensor_to_base64_string, +) + + +GEMINI_BASE_ENDPOINT = "/proxy/vertexai/gemini" +GEMINI_MAX_INPUT_FILE_SIZE = 20 * 1024 * 1024 # 20 MB + + +class GeminiModel(str, Enum): + """ + Gemini Model Names allowed by comfy-api + """ + + gemini_2_5_pro_preview_05_06 = "gemini-2.5-pro-preview-05-06" + gemini_2_5_flash_preview_04_17 = "gemini-2.5-flash-preview-04-17" + + +def get_gemini_endpoint( + model: GeminiModel, +) -> ApiEndpoint[GeminiGenerateContentRequest, GeminiGenerateContentResponse]: + """ + Get the API endpoint for a given Gemini model. + + Args: + model: The Gemini model to use, either as enum or string value. + + Returns: + ApiEndpoint configured for the specific Gemini model. + """ + if isinstance(model, str): + model = GeminiModel(model) + return ApiEndpoint( + path=f"{GEMINI_BASE_ENDPOINT}/{model.value}", + method=HttpMethod.POST, + request_model=GeminiGenerateContentRequest, + response_model=GeminiGenerateContentResponse, + ) + + +class GeminiNode(ComfyNodeABC): + """ + Node to generate text responses from a Gemini model. + + This node allows users to interact with Google's Gemini AI models, providing + multimodal inputs (text, images, audio, video, files) to generate coherent + text responses. The node works with the latest Gemini models, handling the + API communication and response parsing. + """ + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Text inputs to the model, used to generate a response. You can include detailed instructions, questions, or context for the model.", + }, + ), + "model": ( + IO.COMBO, + { + "tooltip": "The Gemini model to use for generating responses.", + "options": [model.value for model in GeminiModel], + "default": GeminiModel.gemini_2_5_pro_preview_05_06.value, + }, + ), + "seed": ( + IO.INT, + { + "default": 42, + "min": 0, + "max": 0xFFFFFFFFFFFFFFFF, + "control_after_generate": True, + "tooltip": "When seed is fixed to a specific value, the model makes a best effort to provide the same response for repeated requests. Deterministic output isn't guaranteed. Also, changing the model or parameter settings, such as the temperature, can cause variations in the response even when you use the same seed value. By default, a random seed value is used.", + }, + ), + }, + "optional": { + "images": ( + IO.IMAGE, + { + "default": None, + "tooltip": "Optional image(s) to use as context for the model. To include multiple images, you can use the Batch Images node.", + }, + ), + "audio": ( + IO.AUDIO, + { + "tooltip": "Optional audio to use as context for the model.", + "default": None, + }, + ), + "video": ( + IO.VIDEO, + { + "tooltip": "Optional video to use as context for the model.", + "default": None, + }, + ), + "files": ( + "GEMINI_INPUT_FILES", + { + "default": None, + "tooltip": "Optional file(s) to use as context for the model. Accepts inputs from the Gemini Generate Content Input Files node.", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + "unique_id": "UNIQUE_ID", + }, + } + + DESCRIPTION = "Generate text responses with Google's Gemini AI model. You can provide multiple types of inputs (text, images, audio, video) as context for generating more relevant and meaningful responses." + RETURN_TYPES = ("STRING",) + FUNCTION = "api_call" + CATEGORY = "api node/text/Gemini" + API_NODE = True + + def get_parts_from_response( + self, response: GeminiGenerateContentResponse + ) -> list[GeminiPart]: + """ + Extract all parts from the Gemini API response. + + Args: + response: The API response from Gemini. + + Returns: + List of response parts from the first candidate. + """ + return response.candidates[0].content.parts + + def get_parts_by_type( + self, response: GeminiGenerateContentResponse, part_type: Literal["text"] | str + ) -> list[GeminiPart]: + """ + Filter response parts by their type. + + Args: + response: The API response from Gemini. + part_type: Type of parts to extract ("text" or a MIME type). + + Returns: + List of response parts matching the requested type. + """ + parts = [] + for part in self.get_parts_from_response(response): + if part_type == "text" and hasattr(part, "text") and part.text: + parts.append(part) + elif ( + hasattr(part, "inlineData") + and part.inlineData + and part.inlineData.mimeType == part_type + ): + parts.append(part) + # Skip parts that don't match the requested type + return parts + + def get_text_from_response(self, response: GeminiGenerateContentResponse) -> str: + """ + Extract and concatenate all text parts from the response. + + Args: + response: The API response from Gemini. + + Returns: + Combined text from all text parts in the response. + """ + parts = self.get_parts_by_type(response, "text") + return "\n".join([part.text for part in parts]) + + def create_video_parts(self, video_input: IO.VIDEO, **kwargs) -> list[GeminiPart]: + """ + Convert video input to Gemini API compatible parts. + + Args: + video_input: Video tensor from ComfyUI. + **kwargs: Additional arguments to pass to the conversion function. + + Returns: + List of GeminiPart objects containing the encoded video. + """ + from comfy_api.util import VideoContainer, VideoCodec + base_64_string = video_to_base64_string( + video_input, + container_format=VideoContainer.MP4, + codec=VideoCodec.H264 + ) + return [ + GeminiPart( + inlineData=GeminiInlineData( + mimeType=GeminiMimeType.video_mp4, + data=base_64_string, + ) + ) + ] + + def create_audio_parts(self, audio_input: IO.AUDIO) -> list[GeminiPart]: + """ + Convert audio input to Gemini API compatible parts. + + Args: + audio_input: Audio input from ComfyUI, containing waveform tensor and sample rate. + + Returns: + List of GeminiPart objects containing the encoded audio. + """ + audio_parts: list[GeminiPart] = [] + for batch_index in range(audio_input["waveform"].shape[0]): + # Recreate an IO.AUDIO object for the given batch dimension index + audio_at_index = { + "waveform": audio_input["waveform"][batch_index].unsqueeze(0), + "sample_rate": audio_input["sample_rate"], + } + # Convert to MP3 format for compatibility with Gemini API + audio_bytes = audio_to_base64_string( + audio_at_index, + container_format="mp3", + codec_name="libmp3lame", + ) + audio_parts.append( + GeminiPart( + inlineData=GeminiInlineData( + mimeType=GeminiMimeType.audio_mp3, + data=audio_bytes, + ) + ) + ) + return audio_parts + + def create_image_parts(self, image_input: torch.Tensor) -> list[GeminiPart]: + """ + Convert image tensor input to Gemini API compatible parts. + + Args: + image_input: Batch of image tensors from ComfyUI. + + Returns: + List of GeminiPart objects containing the encoded images. + """ + image_parts: list[GeminiPart] = [] + for image_index in range(image_input.shape[0]): + image_as_b64 = tensor_to_base64_string( + image_input[image_index].unsqueeze(0) + ) + image_parts.append( + GeminiPart( + inlineData=GeminiInlineData( + mimeType=GeminiMimeType.image_png, + data=image_as_b64, + ) + ) + ) + return image_parts + + def create_text_part(self, text: str) -> GeminiPart: + """ + Create a text part for the Gemini API request. + + Args: + text: The text content to include in the request. + + Returns: + A GeminiPart object with the text content. + """ + return GeminiPart(text=text) + + def api_call( + self, + prompt: str, + model: GeminiModel, + images: Optional[IO.IMAGE] = None, + audio: Optional[IO.AUDIO] = None, + video: Optional[IO.VIDEO] = None, + files: Optional[list[GeminiPart]] = None, + unique_id: Optional[str] = None, + **kwargs, + ) -> tuple[str]: + # Validate inputs + validate_string(prompt, strip_whitespace=False) + + # Create parts list with text prompt as the first part + parts: list[GeminiPart] = [self.create_text_part(prompt)] + + # Add other modal parts + if images is not None: + image_parts = self.create_image_parts(images) + parts.extend(image_parts) + if audio is not None: + parts.extend(self.create_audio_parts(audio)) + if video is not None: + parts.extend(self.create_video_parts(video)) + if files is not None: + parts.extend(files) + + # Create response + response = SynchronousOperation( + endpoint=get_gemini_endpoint(model), + request=GeminiGenerateContentRequest( + contents=[ + GeminiContent( + role="user", + parts=parts, + ) + ] + ), + auth_kwargs=kwargs, + ).execute() + + # Get result output + output_text = self.get_text_from_response(response) + if unique_id and output_text: + PromptServer.instance.send_progress_text(output_text, node_id=unique_id) + + return (output_text or "Empty response from Gemini model...",) + + +class GeminiInputFiles(ComfyNodeABC): + """ + Loads and formats input files for use with the Gemini API. + + This node allows users to include text (.txt) and PDF (.pdf) files as input + context for the Gemini model. Files are converted to the appropriate format + required by the API and can be chained together to include multiple files + in a single request. + """ + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + """ + For details about the supported file input types, see: + https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference + """ + input_dir = folder_paths.get_input_directory() + input_files = [ + f + for f in os.scandir(input_dir) + if f.is_file() + and (f.name.endswith(".txt") or f.name.endswith(".pdf")) + and f.stat().st_size < GEMINI_MAX_INPUT_FILE_SIZE + ] + input_files = sorted(input_files, key=lambda x: x.name) + input_files = [f.name for f in input_files] + return { + "required": { + "file": ( + IO.COMBO, + { + "tooltip": "Input files to include as context for the model. Only accepts text (.txt) and PDF (.pdf) files for now.", + "options": input_files, + "default": input_files[0] if input_files else None, + }, + ), + }, + "optional": { + "GEMINI_INPUT_FILES": ( + "GEMINI_INPUT_FILES", + { + "tooltip": "An optional additional file(s) to batch together with the file loaded from this node. Allows chaining of input files so that a single message can include multiple input files.", + "default": None, + }, + ), + }, + } + + DESCRIPTION = "Loads and prepares input files to include as inputs for Gemini LLM nodes. The files will be read by the Gemini model when generating a response. The contents of the text file count toward the token limit. 🛈 TIP: Can be chained together with other Gemini Input File nodes." + RETURN_TYPES = ("GEMINI_INPUT_FILES",) + FUNCTION = "prepare_files" + CATEGORY = "api node/text/Gemini" + + def create_file_part(self, file_path: str) -> GeminiPart: + mime_type = ( + GeminiMimeType.pdf + if file_path.endswith(".pdf") + else GeminiMimeType.text_plain + ) + # Use base64 string directly, not the data URI + with open(file_path, "rb") as f: + file_content = f.read() + import base64 + base64_str = base64.b64encode(file_content).decode("utf-8") + + return GeminiPart( + inlineData=GeminiInlineData( + mimeType=mime_type, + data=base64_str, + ) + ) + + def prepare_files( + self, file: str, GEMINI_INPUT_FILES: list[GeminiPart] = [] + ) -> tuple[list[GeminiPart]]: + """ + Loads and formats input files for Gemini API. + """ + file_path = folder_paths.get_annotated_filepath(file) + input_file_content = self.create_file_part(file_path) + files = [input_file_content] + GEMINI_INPUT_FILES + return (files,) + + +NODE_CLASS_MAPPINGS = { + "GeminiNode": GeminiNode, + "GeminiInputFiles": GeminiInputFiles, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "GeminiNode": "Google Gemini", + "GeminiInputFiles": "Gemini Input Files", +} diff --git a/comfy_api_nodes/nodes_openai.py b/comfy_api_nodes/nodes_openai.py index ce8054afcd2..be1d2de4aeb 100644 --- a/comfy_api_nodes/nodes_openai.py +++ b/comfy_api_nodes/nodes_openai.py @@ -1,29 +1,86 @@ import io +from typing import TypedDict, Optional +import json +import os +import time +import re +import uuid +from enum import Enum from inspect import cleandoc import numpy as np import torch from PIL import Image - from comfy.comfy_types.node_typing import IO, ComfyNodeABC, InputTypeDict +from server import PromptServer +import folder_paths from comfy_api_nodes.apis import ( OpenAIImageGenerationRequest, OpenAIImageEditRequest, OpenAIImageGenerationResponse, + OpenAICreateResponse, + OpenAIResponse, + CreateModelResponseProperties, + Item, + Includable, + OutputContent, + InputImageContent, + Detail, + InputTextContent, + InputMessage, + InputMessageContentList, + InputContent, + InputFileContent, ) from comfy_api_nodes.apis.client import ( ApiEndpoint, HttpMethod, SynchronousOperation, + PollingOperation, + EmptyRequest, ) from comfy_api_nodes.apinode_utils import ( downscale_image_tensor, validate_and_cast_response, validate_string, + tensor_to_base64_string, + text_filepath_to_data_uri, ) +from comfy_api_nodes.mapper_utils import model_field_to_node_input + + +RESPONSES_ENDPOINT = "/proxy/openai/v1/responses" +STARTING_POINT_ID_PATTERN = r"" + + +class HistoryEntry(TypedDict): + """Type definition for a single history entry in the chat.""" + + prompt: str + response: str + response_id: str + timestamp: float + + +class ChatHistory(TypedDict): + """Type definition for the chat history dictionary.""" + + __annotations__: dict[str, list[HistoryEntry]] + + +class SupportedOpenAIModel(str, Enum): + o4_mini = "o4-mini" + o1 = "o1" + o3 = "o3" + o1_pro = "o1-pro" + gpt_4o = "gpt-4o" + gpt_4_1 = "gpt-4.1" + gpt_4_1_mini = "gpt-4.1-mini" + gpt_4_1_nano = "gpt-4.1-nano" + class OpenAIDalle2(ComfyNodeABC): """ @@ -115,7 +172,7 @@ def api_call( n=1, size="1024x1024", unique_id=None, - **kwargs + **kwargs, ): validate_string(prompt, strip_whitespace=False) model = "dall-e-2" @@ -262,7 +319,7 @@ def api_call( quality="standard", size="1024x1024", unique_id=None, - **kwargs + **kwargs, ): validate_string(prompt, strip_whitespace=False) model = "dall-e-3" @@ -400,12 +457,12 @@ def api_call( n=1, size="1024x1024", unique_id=None, - **kwargs + **kwargs, ): validate_string(prompt, strip_whitespace=False) model = "gpt-image-1" path = "/proxy/openai/images/generations" - content_type="application/json" + content_type = "application/json" request_class = OpenAIImageGenerationRequest img_binaries = [] mask_binary = None @@ -414,7 +471,7 @@ def api_call( if image is not None: path = "/proxy/openai/images/edits" request_class = OpenAIImageEditRequest - content_type ="multipart/form-data" + content_type = "multipart/form-data" batch_size = image.shape[0] @@ -486,17 +543,466 @@ def api_call( return (img_tensor,) -# A dictionary that contains all nodes you want to export with their names -# NOTE: names should be globally unique +class OpenAITextNode(ComfyNodeABC): + """ + Base class for OpenAI text generation nodes. + """ + + RETURN_TYPES = (IO.STRING,) + FUNCTION = "api_call" + CATEGORY = "api node/text/OpenAI" + API_NODE = True + + +class OpenAIChatNode(OpenAITextNode): + """ + Node to generate text responses from an OpenAI model. + """ + + def __init__(self) -> None: + """Initialize the chat node with a new session ID and empty history.""" + self.current_session_id: str = str(uuid.uuid4()) + self.history: dict[str, list[HistoryEntry]] = {} + self.previous_response_id: Optional[str] = None + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": { + "prompt": ( + IO.STRING, + { + "multiline": True, + "default": "", + "tooltip": "Text inputs to the model, used to generate a response.", + }, + ), + "persist_context": ( + IO.BOOLEAN, + { + "default": True, + "tooltip": "Persist chat context between calls (multi-turn conversation)", + }, + ), + "model": model_field_to_node_input( + IO.COMBO, + OpenAICreateResponse, + "model", + enum_type=SupportedOpenAIModel, + ), + }, + "optional": { + "images": ( + IO.IMAGE, + { + "default": None, + "tooltip": "Optional image(s) to use as context for the model. To include multiple images, you can use the Batch Images node.", + }, + ), + "files": ( + "OPENAI_INPUT_FILES", + { + "default": None, + "tooltip": "Optional file(s) to use as context for the model. Accepts inputs from the OpenAI Chat Input Files node.", + }, + ), + "advanced_options": ( + "OPENAI_CHAT_CONFIG", + { + "default": None, + "tooltip": "Optional configuration for the model. Accepts inputs from the OpenAI Chat Advanced Options node.", + }, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + "unique_id": "UNIQUE_ID", + }, + } + + DESCRIPTION = "Generate text responses from an OpenAI model." + + def get_result_response( + self, + response_id: str, + include: Optional[list[Includable]] = None, + auth_kwargs: Optional[dict[str, str]] = None, + ) -> OpenAIResponse: + """ + Retrieve a model response with the given ID from the OpenAI API. + + Args: + response_id (str): The ID of the response to retrieve. + include (Optional[List[Includable]]): Additional fields to include + in the response. See the `include` parameter for Response + creation above for more information. + + """ + return PollingOperation( + poll_endpoint=ApiEndpoint( + path=f"{RESPONSES_ENDPOINT}/{response_id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=OpenAIResponse, + query_params={"include": include}, + ), + completed_statuses=["completed"], + failed_statuses=["failed"], + status_extractor=lambda response: response.status, + auth_kwargs=auth_kwargs, + ).execute() + + def get_message_content_from_response( + self, response: OpenAIResponse + ) -> list[OutputContent]: + """Extract message content from the API response.""" + for output in response.output: + if output.root.type == "message": + return output.root.content + raise TypeError("No output message found in response") + + def get_text_from_message_content( + self, message_content: list[OutputContent] + ) -> str: + """Extract text content from message content.""" + for content_item in message_content: + if content_item.root.type == "output_text": + return str(content_item.root.text) + return "No text output found in response" + + def get_history_text(self, session_id: str) -> str: + """Convert the entire history for a given session to JSON string.""" + return json.dumps(self.history[session_id]) + + def display_history_on_node(self, session_id: str, node_id: str) -> None: + """Display formatted chat history on the node UI.""" + render_spec = { + "node_id": node_id, + "component": "ChatHistoryWidget", + "props": { + "history": self.get_history_text(session_id), + }, + } + PromptServer.instance.send_sync( + "display_component", + render_spec, + ) + + def add_to_history( + self, session_id: str, prompt: str, output_text: str, response_id: str + ) -> None: + """Add a new entry to the chat history.""" + if session_id not in self.history: + self.history[session_id] = [] + self.history[session_id].append( + { + "prompt": prompt, + "response": output_text, + "response_id": response_id, + "timestamp": time.time(), + } + ) + + def parse_output_text_from_response(self, response: OpenAIResponse) -> str: + """Extract text output from the API response.""" + message_contents = self.get_message_content_from_response(response) + return self.get_text_from_message_content(message_contents) + + def generate_new_session_id(self) -> str: + """Generate a new unique session ID.""" + return str(uuid.uuid4()) + + def get_session_id(self, persist_context: bool) -> str: + """Get the current or generate a new session ID based on context persistence.""" + return ( + self.current_session_id + if persist_context + else self.generate_new_session_id() + ) + + def tensor_to_input_image_content( + self, image: torch.Tensor, detail_level: Detail = "auto" + ) -> InputImageContent: + """Convert a tensor to an input image content object.""" + return InputImageContent( + detail=detail_level, + image_url=f"data:image/png;base64,{tensor_to_base64_string(image)}", + type="input_image", + ) + + def create_input_message_contents( + self, + prompt: str, + image: Optional[torch.Tensor] = None, + files: Optional[list[InputFileContent]] = None, + ) -> InputMessageContentList: + """Create a list of input message contents from prompt and optional image.""" + content_list: list[InputContent] = [ + InputTextContent(text=prompt, type="input_text"), + ] + if image is not None: + for i in range(image.shape[0]): + content_list.append( + self.tensor_to_input_image_content(image[i].unsqueeze(0)) + ) + if files is not None: + content_list.extend(files) + + return InputMessageContentList( + root=content_list, + ) + + def parse_response_id_from_prompt(self, prompt: str) -> Optional[str]: + """Extract response ID from prompt if it exists.""" + parsed_id = re.search(STARTING_POINT_ID_PATTERN, prompt) + return parsed_id.group(1) if parsed_id else None + + def strip_response_tag_from_prompt(self, prompt: str) -> str: + """Remove the response ID tag from the prompt.""" + return re.sub(STARTING_POINT_ID_PATTERN, "", prompt.strip()) + + def delete_history_after_response_id( + self, new_start_id: str, session_id: str + ) -> None: + """Delete history entries after a specific response ID.""" + if session_id not in self.history: + return + + new_history = [] + i = 0 + while ( + i < len(self.history[session_id]) + and self.history[session_id][i]["response_id"] != new_start_id + ): + new_history.append(self.history[session_id][i]) + i += 1 + + # Since it's the new starting point (not the response being edited), we include it as well + if i < len(self.history[session_id]): + new_history.append(self.history[session_id][i]) + + self.history[session_id] = new_history + + def api_call( + self, + prompt: str, + persist_context: bool, + model: SupportedOpenAIModel, + unique_id: Optional[str] = None, + images: Optional[torch.Tensor] = None, + files: Optional[list[InputFileContent]] = None, + advanced_options: Optional[CreateModelResponseProperties] = None, + **kwargs, + ) -> tuple[str]: + # Validate inputs + validate_string(prompt, strip_whitespace=False) + + session_id = self.get_session_id(persist_context) + response_id_override = self.parse_response_id_from_prompt(prompt) + if response_id_override: + is_starting_from_beginning = response_id_override == "start" + if is_starting_from_beginning: + self.history[session_id] = [] + previous_response_id = None + else: + previous_response_id = response_id_override + self.delete_history_after_response_id(response_id_override, session_id) + prompt = self.strip_response_tag_from_prompt(prompt) + elif persist_context: + previous_response_id = self.previous_response_id + else: + previous_response_id = None + + # Create response + create_response = SynchronousOperation( + endpoint=ApiEndpoint( + path=RESPONSES_ENDPOINT, + method=HttpMethod.POST, + request_model=OpenAICreateResponse, + response_model=OpenAIResponse, + ), + request=OpenAICreateResponse( + input=[ + Item( + root=InputMessage( + content=self.create_input_message_contents( + prompt, images, files + ), + role="user", + ) + ), + ], + store=True, + stream=False, + model=model, + previous_response_id=previous_response_id, + **( + advanced_options.model_dump(exclude_none=True) + if advanced_options + else {} + ), + ), + auth_kwargs=kwargs, + ).execute() + response_id = create_response.id + + # Get result output + result_response = self.get_result_response(response_id, auth_kwargs=kwargs) + output_text = self.parse_output_text_from_response(result_response) + + # Update history + self.add_to_history(session_id, prompt, output_text, response_id) + self.display_history_on_node(session_id, unique_id) + self.previous_response_id = response_id + + return (output_text,) + + +class OpenAIInputFiles(ComfyNodeABC): + """ + Loads and formats input files for OpenAI API. + """ + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + """ + For details about the supported file input types, see: + https://platform.openai.com/docs/guides/pdf-files?api-mode=responses + """ + input_dir = folder_paths.get_input_directory() + input_files = [ + f + for f in os.scandir(input_dir) + if f.is_file() + and (f.name.endswith(".txt") or f.name.endswith(".pdf")) + and f.stat().st_size < 32 * 1024 * 1024 + ] + input_files = sorted(input_files, key=lambda x: x.name) + input_files = [f.name for f in input_files] + return { + "required": { + "file": ( + IO.COMBO, + { + "tooltip": "Input files to include as context for the model. Only accepts text (.txt) and PDF (.pdf) files for now.", + "options": input_files, + "default": input_files[0] if input_files else None, + }, + ), + }, + "optional": { + "OPENAI_INPUT_FILES": ( + "OPENAI_INPUT_FILES", + { + "tooltip": "An optional additional file(s) to batch together with the file loaded from this node. Allows chaining of input files so that a single message can include multiple input files.", + "default": None, + }, + ), + }, + } + + DESCRIPTION = "Loads and prepares input files (text, pdf, etc.) to include as inputs for the OpenAI Chat Node. The files will be read by the OpenAI model when generating a response. 🛈 TIP: Can be chained together with other OpenAI Input File nodes." + RETURN_TYPES = ("OPENAI_INPUT_FILES",) + FUNCTION = "prepare_files" + CATEGORY = "api node/text/OpenAI" + + def create_input_file_content(self, file_path: str) -> InputFileContent: + return InputFileContent( + file_data=text_filepath_to_data_uri(file_path), + filename=os.path.basename(file_path), + type="input_file", + ) + + def prepare_files( + self, file: str, OPENAI_INPUT_FILES: list[InputFileContent] = [] + ) -> tuple[list[InputFileContent]]: + """ + Loads and formats input files for OpenAI API. + """ + file_path = folder_paths.get_annotated_filepath(file) + input_file_content = self.create_input_file_content(file_path) + files = [input_file_content] + OPENAI_INPUT_FILES + return (files,) + + +class OpenAIChatConfig(ComfyNodeABC): + """Allows setting additional configuration for the OpenAI Chat Node.""" + + RETURN_TYPES = ("OPENAI_CHAT_CONFIG",) + FUNCTION = "configure" + DESCRIPTION = ( + "Allows specifying advanced configuration options for the OpenAI Chat Nodes." + ) + CATEGORY = "api node/text/OpenAI" + + @classmethod + def INPUT_TYPES(cls) -> InputTypeDict: + return { + "required": { + "truncation": ( + IO.COMBO, + { + "options": ["auto", "disabled"], + "default": "auto", + "tooltip": "The truncation strategy to use for the model response. auto: If the context of this response and previous ones exceeds the model's context window size, the model will truncate the response to fit the context window by dropping input items in the middle of the conversation.disabled: If a model response will exceed the context window size for a model, the request will fail with a 400 error", + }, + ), + }, + "optional": { + "max_output_tokens": model_field_to_node_input( + IO.INT, + OpenAICreateResponse, + "max_output_tokens", + min=16, + default=4096, + max=16384, + tooltip="An upper bound for the number of tokens that can be generated for a response, including visible output tokens", + ), + "instructions": model_field_to_node_input( + IO.STRING, OpenAICreateResponse, "instructions", multiline=True + ), + }, + } + + def configure( + self, + truncation: bool, + instructions: Optional[str] = None, + max_output_tokens: Optional[int] = None, + ) -> tuple[CreateModelResponseProperties]: + """ + Configure advanced options for the OpenAI Chat Node. + + Note: + While `top_p` and `temperature` are listed as properties in the + spec, they are not supported for all models (e.g., o4-mini). + They are not exposed as inputs at all to avoid having to manually + remove depending on model choice. + """ + return ( + CreateModelResponseProperties( + instructions=instructions, + truncation=truncation, + max_output_tokens=max_output_tokens, + ), + ) + + NODE_CLASS_MAPPINGS = { "OpenAIDalle2": OpenAIDalle2, "OpenAIDalle3": OpenAIDalle3, "OpenAIGPTImage1": OpenAIGPTImage1, + "OpenAIChatNode": OpenAIChatNode, + "OpenAIInputFiles": OpenAIInputFiles, + "OpenAIChatConfig": OpenAIChatConfig, } -# A dictionary that contains the friendly/humanly readable titles for the nodes NODE_DISPLAY_NAME_MAPPINGS = { "OpenAIDalle2": "OpenAI DALL·E 2", "OpenAIDalle3": "OpenAI DALL·E 3", "OpenAIGPTImage1": "OpenAI GPT Image 1", + "OpenAIChatNode": "OpenAI Chat", + "OpenAIInputFiles": "OpenAI Chat Input Files", + "OpenAIChatConfig": "OpenAI Chat Advanced Options", } diff --git a/comfy_api_nodes/nodes_rodin.py b/comfy_api_nodes/nodes_rodin.py new file mode 100644 index 00000000000..67f90478c1b --- /dev/null +++ b/comfy_api_nodes/nodes_rodin.py @@ -0,0 +1,462 @@ +""" +ComfyUI X Rodin3D(Deemos) API Nodes + +Rodin API docs: https://developer.hyper3d.ai/ + +""" + +from __future__ import annotations +from inspect import cleandoc +from comfy.comfy_types.node_typing import IO +import folder_paths as comfy_paths +import requests +import os +import datetime +import shutil +import time +import io +import logging +import math +from PIL import Image +from comfy_api_nodes.apis.rodin_api import ( + Rodin3DGenerateRequest, + Rodin3DGenerateResponse, + Rodin3DCheckStatusRequest, + Rodin3DCheckStatusResponse, + Rodin3DDownloadRequest, + Rodin3DDownloadResponse, + JobStatus, +) +from comfy_api_nodes.apis.client import ( + ApiEndpoint, + HttpMethod, + SynchronousOperation, + PollingOperation, +) + + +COMMON_PARAMETERS = { + "Seed": ( + IO.INT, + { + "default":0, + "min":0, + "max":65535, + "display":"number" + } + ), + "Material_Type": ( + IO.COMBO, + { + "options": ["PBR", "Shaded"], + "default": "PBR" + } + ), + "Polygon_count": ( + IO.COMBO, + { + "options": ["4K-Quad", "8K-Quad", "18K-Quad", "50K-Quad", "200K-Triangle"], + "default": "18K-Quad" + } + ) +} + +def create_task_error(response: Rodin3DGenerateResponse): + """Check if the response has error""" + return hasattr(response, "error") + + + +class Rodin3DAPI: + """ + Generate 3D Assets using Rodin API + """ + RETURN_TYPES = (IO.STRING,) + RETURN_NAMES = ("3D Model Path",) + CATEGORY = "api node/3d/Rodin" + DESCRIPTION = cleandoc(__doc__ or "") + FUNCTION = "api_call" + API_NODE = True + + def tensor_to_filelike(self, tensor, max_pixels: int = 2048*2048): + """ + Converts a PyTorch tensor to a file-like object. + + Args: + - tensor (torch.Tensor): A tensor representing an image of shape (H, W, C) + where C is the number of channels (3 for RGB), H is height, and W is width. + + Returns: + - io.BytesIO: A file-like object containing the image data. + """ + array = tensor.cpu().numpy() + array = (array * 255).astype('uint8') + image = Image.fromarray(array, 'RGB') + + original_width, original_height = image.size + original_pixels = original_width * original_height + if original_pixels > max_pixels: + scale = math.sqrt(max_pixels / original_pixels) + new_width = int(original_width * scale) + new_height = int(original_height * scale) + else: + new_width, new_height = original_width, original_height + + if new_width != original_width or new_height != original_height: + image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) + + img_byte_arr = io.BytesIO() + image.save(img_byte_arr, format='PNG') # PNG is used for lossless compression + img_byte_arr.seek(0) + return img_byte_arr + + def check_rodin_status(self, response: Rodin3DCheckStatusResponse) -> str: + has_failed = any(job.status == JobStatus.Failed for job in response.jobs) + all_done = all(job.status == JobStatus.Done for job in response.jobs) + status_list = [str(job.status) for job in response.jobs] + logging.info(f"[ Rodin3D API - CheckStatus ] Generate Status: {status_list}") + if has_failed: + logging.error(f"[ Rodin3D API - CheckStatus ] Generate Failed: {status_list}, Please try again.") + raise Exception("[ Rodin3D API ] Generate Failed, Please Try again.") + elif all_done: + return "DONE" + else: + return "Generating" + + def CreateGenerateTask(self, images=None, seed=1, material="PBR", quality="medium", tier="Regular", mesh_mode="Quad", **kwargs): + if images == None: + raise Exception("Rodin 3D generate requires at least 1 image.") + if len(images) >= 5: + raise Exception("Rodin 3D generate requires up to 5 image.") + + path = "/proxy/rodin/api/v2/rodin" + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=path, + method=HttpMethod.POST, + request_model=Rodin3DGenerateRequest, + response_model=Rodin3DGenerateResponse, + ), + request=Rodin3DGenerateRequest( + seed=seed, + tier=tier, + material=material, + quality=quality, + mesh_mode=mesh_mode + ), + files=[ + ( + "images", + open(image, "rb") if isinstance(image, str) else self.tensor_to_filelike(image) + ) + for image in images if image is not None + ], + content_type = "multipart/form-data", + auth_kwargs=kwargs, + ) + + response = operation.execute() + + if create_task_error(response): + error_message = f"Rodin3D Create 3D generate Task Failed. Message: {response.message}, error: {response.error}" + logging.error(error_message) + raise Exception(error_message) + + logging.info("[ Rodin3D API - Submit Jobs ] Submit Generate Task Success!") + subscription_key = response.jobs.subscription_key + task_uuid = response.uuid + logging.info(f"[ Rodin3D API - Submit Jobs ] UUID: {task_uuid}") + return task_uuid, subscription_key + + def poll_for_task_status(self, subscription_key, **kwargs) -> Rodin3DCheckStatusResponse: + + path = "/proxy/rodin/api/v2/status" + + poll_operation = PollingOperation( + poll_endpoint=ApiEndpoint( + path = path, + method=HttpMethod.POST, + request_model=Rodin3DCheckStatusRequest, + response_model=Rodin3DCheckStatusResponse, + ), + request=Rodin3DCheckStatusRequest( + subscription_key = subscription_key + ), + completed_statuses=["DONE"], + failed_statuses=["FAILED"], + status_extractor=self.check_rodin_status, + poll_interval=3.0, + auth_kwargs=kwargs, + ) + + logging.info("[ Rodin3D API - CheckStatus ] Generate Start!") + + return poll_operation.execute() + + + + def GetRodinDownloadList(self, uuid, **kwargs) -> Rodin3DDownloadResponse: + logging.info("[ Rodin3D API - Downloading ] Generate Successfully!") + + path = "/proxy/rodin/api/v2/download" + operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=path, + method=HttpMethod.POST, + request_model=Rodin3DDownloadRequest, + response_model=Rodin3DDownloadResponse, + ), + request=Rodin3DDownloadRequest( + task_uuid=uuid + ), + auth_kwargs=kwargs + ) + + return operation.execute() + + def GetQualityAndMode(self, PolyCount): + if PolyCount == "200K-Triangle": + mesh_mode = "Raw" + quality = "medium" + else: + mesh_mode = "Quad" + if PolyCount == "4K-Quad": + quality = "extra-low" + elif PolyCount == "8K-Quad": + quality = "low" + elif PolyCount == "18K-Quad": + quality = "medium" + elif PolyCount == "50K-Quad": + quality = "high" + else: + quality = "medium" + + return mesh_mode, quality + + def DownLoadFiles(self, Url_List): + Save_path = os.path.join(comfy_paths.get_output_directory(), "Rodin3D", datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) + os.makedirs(Save_path, exist_ok=True) + model_file_path = None + for Item in Url_List.list: + url = Item.url + file_name = Item.name + file_path = os.path.join(Save_path, file_name) + if file_path.endswith(".glb"): + model_file_path = file_path + logging.info(f"[ Rodin3D API - download_files ] Downloading file: {file_path}") + max_retries = 5 + for attempt in range(max_retries): + try: + with requests.get(url, stream=True) as r: + r.raise_for_status() + with open(file_path, "wb") as f: + shutil.copyfileobj(r.raw, f) + break + except Exception as e: + logging.info(f"[ Rodin3D API - download_files ] Error downloading {file_path}:{e}") + if attempt < max_retries - 1: + logging.info("Retrying...") + time.sleep(2) + else: + logging.info(f"[ Rodin3D API - download_files ] Failed to download {file_path} after {max_retries} attempts.") + + return model_file_path + + +class Rodin3D_Regular(Rodin3DAPI): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "Images": + ( + IO.IMAGE, + { + "forceInput":True, + } + ) + }, + "optional": { + **COMMON_PARAMETERS + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + Images, + Seed, + Material_Type, + Polygon_count, + **kwargs + ): + tier = "Regular" + num_images = Images.shape[0] + m_images = [] + for i in range(num_images): + m_images.append(Images[i]) + mesh_mode, quality = self.GetQualityAndMode(Polygon_count) + task_uuid, subscription_key = self.CreateGenerateTask(images=m_images, seed=Seed, material=Material_Type, quality=quality, tier=tier, mesh_mode=mesh_mode, **kwargs) + self.poll_for_task_status(subscription_key, **kwargs) + Download_List = self.GetRodinDownloadList(task_uuid, **kwargs) + model = self.DownLoadFiles(Download_List) + + return (model,) + +class Rodin3D_Detail(Rodin3DAPI): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "Images": + ( + IO.IMAGE, + { + "forceInput":True, + } + ) + }, + "optional": { + **COMMON_PARAMETERS + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + Images, + Seed, + Material_Type, + Polygon_count, + **kwargs + ): + tier = "Detail" + num_images = Images.shape[0] + m_images = [] + for i in range(num_images): + m_images.append(Images[i]) + mesh_mode, quality = self.GetQualityAndMode(Polygon_count) + task_uuid, subscription_key = self.CreateGenerateTask(images=m_images, seed=Seed, material=Material_Type, quality=quality, tier=tier, mesh_mode=mesh_mode, **kwargs) + self.poll_for_task_status(subscription_key, **kwargs) + Download_List = self.GetRodinDownloadList(task_uuid, **kwargs) + model = self.DownLoadFiles(Download_List) + + return (model,) + +class Rodin3D_Smooth(Rodin3DAPI): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "Images": + ( + IO.IMAGE, + { + "forceInput":True, + } + ) + }, + "optional": { + **COMMON_PARAMETERS + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + Images, + Seed, + Material_Type, + Polygon_count, + **kwargs + ): + tier = "Smooth" + num_images = Images.shape[0] + m_images = [] + for i in range(num_images): + m_images.append(Images[i]) + mesh_mode, quality = self.GetQualityAndMode(Polygon_count) + task_uuid, subscription_key = self.CreateGenerateTask(images=m_images, seed=Seed, material=Material_Type, quality=quality, tier=tier, mesh_mode=mesh_mode, **kwargs) + self.poll_for_task_status(subscription_key, **kwargs) + Download_List = self.GetRodinDownloadList(task_uuid, **kwargs) + model = self.DownLoadFiles(Download_List) + + return (model,) + +class Rodin3D_Sketch(Rodin3DAPI): + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "Images": + ( + IO.IMAGE, + { + "forceInput":True, + } + ) + }, + "optional": { + "Seed": + ( + IO.INT, + { + "default":0, + "min":0, + "max":65535, + "display":"number" + } + ) + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + Images, + Seed, + **kwargs + ): + tier = "Sketch" + num_images = Images.shape[0] + m_images = [] + for i in range(num_images): + m_images.append(Images[i]) + material_type = "PBR" + quality = "medium" + mesh_mode = "Quad" + task_uuid, subscription_key = self.CreateGenerateTask(images=m_images, seed=Seed, material=material_type, quality=quality, tier=tier, mesh_mode=mesh_mode, **kwargs) + self.poll_for_task_status(subscription_key, **kwargs) + Download_List = self.GetRodinDownloadList(task_uuid, **kwargs) + model = self.DownLoadFiles(Download_List) + + return (model,) + +# A dictionary that contains all nodes you want to export with their names +# NOTE: names should be globally unique +NODE_CLASS_MAPPINGS = { + "Rodin3D_Regular": Rodin3D_Regular, + "Rodin3D_Detail": Rodin3D_Detail, + "Rodin3D_Smooth": Rodin3D_Smooth, + "Rodin3D_Sketch": Rodin3D_Sketch, +} + +# A dictionary that contains the friendly/humanly readable titles for the nodes +NODE_DISPLAY_NAME_MAPPINGS = { + "Rodin3D_Regular": "Rodin 3D Generate - Regular Generate", + "Rodin3D_Detail": "Rodin 3D Generate - Detail Generate", + "Rodin3D_Smooth": "Rodin 3D Generate - Smooth Generate", + "Rodin3D_Sketch": "Rodin 3D Generate - Sketch Generate", +} diff --git a/comfy_api_nodes/nodes_runway.py b/comfy_api_nodes/nodes_runway.py new file mode 100644 index 00000000000..af4b321f96d --- /dev/null +++ b/comfy_api_nodes/nodes_runway.py @@ -0,0 +1,635 @@ +"""Runway API Nodes + +API Docs: + - https://docs.dev.runwayml.com/api/#tag/Task-management/paths/~1v1~1tasks~1%7Bid%7D/delete + +User Guides: + - https://help.runwayml.com/hc/en-us/sections/30265301423635-Gen-3-Alpha + - https://help.runwayml.com/hc/en-us/articles/37327109429011-Creating-with-Gen-4-Video + - https://help.runwayml.com/hc/en-us/articles/33927968552339-Creating-with-Act-One-on-Gen-3-Alpha-and-Turbo + - https://help.runwayml.com/hc/en-us/articles/34170748696595-Creating-with-Keyframes-on-Gen-3 + +""" + +from typing import Union, Optional, Any +from enum import Enum + +import torch + +from comfy_api_nodes.apis import ( + RunwayImageToVideoRequest, + RunwayImageToVideoResponse, + RunwayTaskStatusResponse as TaskStatusResponse, + RunwayTaskStatusEnum as TaskStatus, + RunwayModelEnum as Model, + RunwayDurationEnum as Duration, + RunwayAspectRatioEnum as AspectRatio, + RunwayPromptImageObject, + RunwayPromptImageDetailedObject, + RunwayTextToImageRequest, + RunwayTextToImageResponse, + Model4, + ReferenceImage, + RunwayTextToImageAspectRatioEnum, +) +from comfy_api_nodes.apis.client import ( + ApiEndpoint, + HttpMethod, + SynchronousOperation, + PollingOperation, + EmptyRequest, +) +from comfy_api_nodes.apinode_utils import ( + upload_images_to_comfyapi, + download_url_to_video_output, + image_tensor_pair_to_batch, + validate_string, + download_url_to_image_tensor, +) +from comfy_api_nodes.mapper_utils import model_field_to_node_input +from comfy_api.input_impl import VideoFromFile +from comfy.comfy_types.node_typing import IO, ComfyNodeABC + +PATH_IMAGE_TO_VIDEO = "/proxy/runway/image_to_video" +PATH_TEXT_TO_IMAGE = "/proxy/runway/text_to_image" +PATH_GET_TASK_STATUS = "/proxy/runway/tasks" + +AVERAGE_DURATION_I2V_SECONDS = 64 +AVERAGE_DURATION_FLF_SECONDS = 256 +AVERAGE_DURATION_T2I_SECONDS = 41 + + +class RunwayApiError(Exception): + """Base exception for Runway API errors.""" + + pass + + +class RunwayGen4TurboAspectRatio(str, Enum): + """Aspect ratios supported for Image to Video API when using gen4_turbo model.""" + + field_1280_720 = "1280:720" + field_720_1280 = "720:1280" + field_1104_832 = "1104:832" + field_832_1104 = "832:1104" + field_960_960 = "960:960" + field_1584_672 = "1584:672" + + +class RunwayGen3aAspectRatio(str, Enum): + """Aspect ratios supported for Image to Video API when using gen3a_turbo model.""" + + field_768_1280 = "768:1280" + field_1280_768 = "1280:768" + + +def get_video_url_from_task_status(response: TaskStatusResponse) -> Union[str, None]: + """Returns the video URL from the task status response if it exists.""" + if response.output and len(response.output) > 0: + return response.output[0] + return None + + +# TODO: replace with updated image validation utils (upstream) +def validate_input_image(image: torch.Tensor) -> bool: + """ + Validate the input image is within the size limits for the Runway API. + See: https://docs.dev.runwayml.com/assets/inputs/#common-error-reasons + """ + return image.shape[2] < 8000 and image.shape[1] < 8000 + + +def poll_until_finished( + auth_kwargs: dict[str, str], + api_endpoint: ApiEndpoint[Any, TaskStatusResponse], + estimated_duration: Optional[int] = None, + node_id: Optional[str] = None, +) -> TaskStatusResponse: + """Polls the Runway API endpoint until the task reaches a terminal state, then returns the response.""" + return PollingOperation( + poll_endpoint=api_endpoint, + completed_statuses=[ + TaskStatus.SUCCEEDED.value, + ], + failed_statuses=[ + TaskStatus.FAILED.value, + TaskStatus.CANCELLED.value, + ], + status_extractor=lambda response: (response.status.value), + auth_kwargs=auth_kwargs, + result_url_extractor=get_video_url_from_task_status, + estimated_duration=estimated_duration, + node_id=node_id, + progress_extractor=extract_progress_from_task_status, + ).execute() + + +def extract_progress_from_task_status( + response: TaskStatusResponse, +) -> Union[float, None]: + if hasattr(response, "progress") and response.progress is not None: + return response.progress * 100 + return None + + +def get_image_url_from_task_status(response: TaskStatusResponse) -> Union[str, None]: + """Returns the image URL from the task status response if it exists.""" + if response.output and len(response.output) > 0: + return response.output[0] + return None + + +class RunwayVideoGenNode(ComfyNodeABC): + """Runway Video Node Base.""" + + RETURN_TYPES = ("VIDEO",) + FUNCTION = "api_call" + CATEGORY = "api node/video/Runway" + API_NODE = True + + def validate_task_created(self, response: RunwayImageToVideoResponse) -> bool: + """ + Validate the task creation response from the Runway API matches + expected format. + """ + if not bool(response.id): + raise RunwayApiError("Invalid initial response from Runway API.") + return True + + def validate_response(self, response: RunwayImageToVideoResponse) -> bool: + """ + Validate the successful task status response from the Runway API + matches expected format. + """ + if not response.output or len(response.output) == 0: + raise RunwayApiError( + "Runway task succeeded but no video data found in response." + ) + return True + + def get_response( + self, task_id: str, auth_kwargs: dict[str, str], node_id: Optional[str] = None + ) -> RunwayImageToVideoResponse: + """Poll the task status until it is finished then get the response.""" + return poll_until_finished( + auth_kwargs, + ApiEndpoint( + path=f"{PATH_GET_TASK_STATUS}/{task_id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=TaskStatusResponse, + ), + estimated_duration=AVERAGE_DURATION_FLF_SECONDS, + node_id=node_id, + ) + + def generate_video( + self, + request: RunwayImageToVideoRequest, + auth_kwargs: dict[str, str], + node_id: Optional[str] = None, + ) -> tuple[VideoFromFile]: + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=PATH_IMAGE_TO_VIDEO, + method=HttpMethod.POST, + request_model=RunwayImageToVideoRequest, + response_model=RunwayImageToVideoResponse, + ), + request=request, + auth_kwargs=auth_kwargs, + ) + + initial_response = initial_operation.execute() + self.validate_task_created(initial_response) + task_id = initial_response.id + + final_response = self.get_response(task_id, auth_kwargs, node_id) + self.validate_response(final_response) + + video_url = get_video_url_from_task_status(final_response) + return (download_url_to_video_output(video_url),) + + +class RunwayImageToVideoNodeGen3a(RunwayVideoGenNode): + """Runway Image to Video Node using Gen3a Turbo model.""" + + DESCRIPTION = "Generate a video from a single starting frame using Gen3a Turbo model. Before diving in, review these best practices to ensure that your input selections will set your generation up for success: https://help.runwayml.com/hc/en-us/articles/33927968552339-Creating-with-Act-One-on-Gen-3-Alpha-and-Turbo." + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": model_field_to_node_input( + IO.STRING, RunwayImageToVideoRequest, "promptText", multiline=True + ), + "start_frame": ( + IO.IMAGE, + {"tooltip": "Start frame to be used for the video"}, + ), + "duration": model_field_to_node_input( + IO.COMBO, RunwayImageToVideoRequest, "duration", enum_type=Duration + ), + "ratio": model_field_to_node_input( + IO.COMBO, + RunwayImageToVideoRequest, + "ratio", + enum_type=RunwayGen3aAspectRatio, + ), + "seed": model_field_to_node_input( + IO.INT, + RunwayImageToVideoRequest, + "seed", + control_after_generate=True, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + "unique_id": "UNIQUE_ID", + }, + } + + def api_call( + self, + prompt: str, + start_frame: torch.Tensor, + duration: str, + ratio: str, + seed: int, + unique_id: Optional[str] = None, + **kwargs, + ) -> tuple[VideoFromFile]: + # Validate inputs + validate_string(prompt, min_length=1) + validate_input_image(start_frame) + + # Upload image + download_urls = upload_images_to_comfyapi( + start_frame, + max_images=1, + mime_type="image/png", + auth_kwargs=kwargs, + ) + if len(download_urls) != 1: + raise RunwayApiError("Failed to upload one or more images to comfy api.") + + return self.generate_video( + RunwayImageToVideoRequest( + promptText=prompt, + seed=seed, + model=Model("gen3a_turbo"), + duration=Duration(duration), + ratio=AspectRatio(ratio), + promptImage=RunwayPromptImageObject( + root=[ + RunwayPromptImageDetailedObject( + uri=str(download_urls[0]), position="first" + ) + ] + ), + ), + auth_kwargs=kwargs, + node_id=unique_id, + ) + + +class RunwayImageToVideoNodeGen4(RunwayVideoGenNode): + """Runway Image to Video Node using Gen4 Turbo model.""" + + DESCRIPTION = "Generate a video from a single starting frame using Gen4 Turbo model. Before diving in, review these best practices to ensure that your input selections will set your generation up for success: https://help.runwayml.com/hc/en-us/articles/37327109429011-Creating-with-Gen-4-Video." + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": model_field_to_node_input( + IO.STRING, RunwayImageToVideoRequest, "promptText", multiline=True + ), + "start_frame": ( + IO.IMAGE, + {"tooltip": "Start frame to be used for the video"}, + ), + "duration": model_field_to_node_input( + IO.COMBO, RunwayImageToVideoRequest, "duration", enum_type=Duration + ), + "ratio": model_field_to_node_input( + IO.COMBO, + RunwayImageToVideoRequest, + "ratio", + enum_type=RunwayGen4TurboAspectRatio, + ), + "seed": model_field_to_node_input( + IO.INT, + RunwayImageToVideoRequest, + "seed", + control_after_generate=True, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + "unique_id": "UNIQUE_ID", + }, + } + + def api_call( + self, + prompt: str, + start_frame: torch.Tensor, + duration: str, + ratio: str, + seed: int, + unique_id: Optional[str] = None, + **kwargs, + ) -> tuple[VideoFromFile]: + # Validate inputs + validate_string(prompt, min_length=1) + validate_input_image(start_frame) + + # Upload image + download_urls = upload_images_to_comfyapi( + start_frame, + max_images=1, + mime_type="image/png", + auth_kwargs=kwargs, + ) + if len(download_urls) != 1: + raise RunwayApiError("Failed to upload one or more images to comfy api.") + + return self.generate_video( + RunwayImageToVideoRequest( + promptText=prompt, + seed=seed, + model=Model("gen4_turbo"), + duration=Duration(duration), + ratio=AspectRatio(ratio), + promptImage=RunwayPromptImageObject( + root=[ + RunwayPromptImageDetailedObject( + uri=str(download_urls[0]), position="first" + ) + ] + ), + ), + auth_kwargs=kwargs, + node_id=unique_id, + ) + + +class RunwayFirstLastFrameNode(RunwayVideoGenNode): + """Runway First-Last Frame Node.""" + + DESCRIPTION = "Upload first and last keyframes, draft a prompt, and generate a video. More complex transitions, such as cases where the Last frame is completely different from the First frame, may benefit from the longer 10s duration. This would give the generation more time to smoothly transition between the two inputs. Before diving in, review these best practices to ensure that your input selections will set your generation up for success: https://help.runwayml.com/hc/en-us/articles/34170748696595-Creating-with-Keyframes-on-Gen-3." + + def get_response( + self, task_id: str, auth_kwargs: dict[str, str], node_id: Optional[str] = None + ) -> RunwayImageToVideoResponse: + return poll_until_finished( + auth_kwargs, + ApiEndpoint( + path=f"{PATH_GET_TASK_STATUS}/{task_id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=TaskStatusResponse, + ), + estimated_duration=AVERAGE_DURATION_FLF_SECONDS, + node_id=node_id, + ) + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": model_field_to_node_input( + IO.STRING, RunwayImageToVideoRequest, "promptText", multiline=True + ), + "start_frame": ( + IO.IMAGE, + {"tooltip": "Start frame to be used for the video"}, + ), + "end_frame": ( + IO.IMAGE, + { + "tooltip": "End frame to be used for the video. Supported for gen3a_turbo only." + }, + ), + "duration": model_field_to_node_input( + IO.COMBO, RunwayImageToVideoRequest, "duration", enum_type=Duration + ), + "ratio": model_field_to_node_input( + IO.COMBO, + RunwayImageToVideoRequest, + "ratio", + enum_type=RunwayGen3aAspectRatio, + ), + "seed": model_field_to_node_input( + IO.INT, + RunwayImageToVideoRequest, + "seed", + control_after_generate=True, + ), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "unique_id": "UNIQUE_ID", + "comfy_api_key": "API_KEY_COMFY_ORG", + }, + } + + def api_call( + self, + prompt: str, + start_frame: torch.Tensor, + end_frame: torch.Tensor, + duration: str, + ratio: str, + seed: int, + unique_id: Optional[str] = None, + **kwargs, + ) -> tuple[VideoFromFile]: + # Validate inputs + validate_string(prompt, min_length=1) + validate_input_image(start_frame) + validate_input_image(end_frame) + + # Upload images + stacked_input_images = image_tensor_pair_to_batch(start_frame, end_frame) + download_urls = upload_images_to_comfyapi( + stacked_input_images, + max_images=2, + mime_type="image/png", + auth_kwargs=kwargs, + ) + if len(download_urls) != 2: + raise RunwayApiError("Failed to upload one or more images to comfy api.") + + return self.generate_video( + RunwayImageToVideoRequest( + promptText=prompt, + seed=seed, + model=Model("gen3a_turbo"), + duration=Duration(duration), + ratio=AspectRatio(ratio), + promptImage=RunwayPromptImageObject( + root=[ + RunwayPromptImageDetailedObject( + uri=str(download_urls[0]), position="first" + ), + RunwayPromptImageDetailedObject( + uri=str(download_urls[1]), position="last" + ), + ] + ), + ), + auth_kwargs=kwargs, + node_id=unique_id, + ) + + +class RunwayTextToImageNode(ComfyNodeABC): + """Runway Text to Image Node.""" + + RETURN_TYPES = ("IMAGE",) + FUNCTION = "api_call" + CATEGORY = "api node/image/Runway" + API_NODE = True + DESCRIPTION = "Generate an image from a text prompt using Runway's Gen 4 model. You can also include reference images to guide the generation." + + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": model_field_to_node_input( + IO.STRING, RunwayTextToImageRequest, "promptText", multiline=True + ), + "ratio": model_field_to_node_input( + IO.COMBO, + RunwayTextToImageRequest, + "ratio", + enum_type=RunwayTextToImageAspectRatioEnum, + ), + }, + "optional": { + "reference_image": ( + IO.IMAGE, + {"tooltip": "Optional reference image to guide the generation"}, + ) + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + "unique_id": "UNIQUE_ID", + }, + } + + def validate_task_created(self, response: RunwayTextToImageResponse) -> bool: + """ + Validate the task creation response from the Runway API matches + expected format. + """ + if not bool(response.id): + raise RunwayApiError("Invalid initial response from Runway API.") + return True + + def validate_response(self, response: TaskStatusResponse) -> bool: + """ + Validate the successful task status response from the Runway API + matches expected format. + """ + if not response.output or len(response.output) == 0: + raise RunwayApiError( + "Runway task succeeded but no image data found in response." + ) + return True + + def get_response( + self, task_id: str, auth_kwargs: dict[str, str], node_id: Optional[str] = None + ) -> TaskStatusResponse: + """Poll the task status until it is finished then get the response.""" + return poll_until_finished( + auth_kwargs, + ApiEndpoint( + path=f"{PATH_GET_TASK_STATUS}/{task_id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=TaskStatusResponse, + ), + estimated_duration=AVERAGE_DURATION_T2I_SECONDS, + node_id=node_id, + ) + + def api_call( + self, + prompt: str, + ratio: str, + reference_image: Optional[torch.Tensor] = None, + unique_id: Optional[str] = None, + **kwargs, + ) -> tuple[torch.Tensor]: + # Validate inputs + validate_string(prompt, min_length=1) + + # Prepare reference images if provided + reference_images = None + if reference_image is not None: + validate_input_image(reference_image) + download_urls = upload_images_to_comfyapi( + reference_image, + max_images=1, + mime_type="image/png", + auth_kwargs=kwargs, + ) + if len(download_urls) != 1: + raise RunwayApiError("Failed to upload reference image to comfy api.") + + reference_images = [ReferenceImage(uri=str(download_urls[0]))] + + # Create request + request = RunwayTextToImageRequest( + promptText=prompt, + model=Model4.gen4_image, + ratio=ratio, + referenceImages=reference_images, + ) + + # Execute initial request + initial_operation = SynchronousOperation( + endpoint=ApiEndpoint( + path=PATH_TEXT_TO_IMAGE, + method=HttpMethod.POST, + request_model=RunwayTextToImageRequest, + response_model=RunwayTextToImageResponse, + ), + request=request, + auth_kwargs=kwargs, + ) + + initial_response = initial_operation.execute() + self.validate_task_created(initial_response) + task_id = initial_response.id + + # Poll for completion + final_response = self.get_response( + task_id, auth_kwargs=kwargs, node_id=unique_id + ) + self.validate_response(final_response) + + # Download and return image + image_url = get_image_url_from_task_status(final_response) + return (download_url_to_image_tensor(image_url),) + + +NODE_CLASS_MAPPINGS = { + "RunwayFirstLastFrameNode": RunwayFirstLastFrameNode, + "RunwayImageToVideoNodeGen3a": RunwayImageToVideoNodeGen3a, + "RunwayImageToVideoNodeGen4": RunwayImageToVideoNodeGen4, + "RunwayTextToImageNode": RunwayTextToImageNode, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "RunwayFirstLastFrameNode": "Runway First-Last-Frame to Video", + "RunwayImageToVideoNodeGen3a": "Runway Image to Video (Gen3a Turbo)", + "RunwayImageToVideoNodeGen4": "Runway Image to Video (Gen4 Turbo)", + "RunwayTextToImageNode": "Runway Text to Image", +} diff --git a/comfy_api_nodes/nodes_tripo.py b/comfy_api_nodes/nodes_tripo.py new file mode 100644 index 00000000000..65f3b21f5cc --- /dev/null +++ b/comfy_api_nodes/nodes_tripo.py @@ -0,0 +1,574 @@ +import os +from folder_paths import get_output_directory +from comfy_api_nodes.mapper_utils import model_field_to_node_input +from comfy.comfy_types.node_typing import IO +from comfy_api_nodes.apis import ( + TripoOrientation, + TripoModelVersion, +) +from comfy_api_nodes.apis.tripo_api import ( + TripoTaskType, + TripoStyle, + TripoFileReference, + TripoFileEmptyReference, + TripoUrlReference, + TripoTaskResponse, + TripoTaskStatus, + TripoTextToModelRequest, + TripoImageToModelRequest, + TripoMultiviewToModelRequest, + TripoTextureModelRequest, + TripoRefineModelRequest, + TripoAnimateRigRequest, + TripoAnimateRetargetRequest, + TripoConvertModelRequest, +) + +from comfy_api_nodes.apis.client import ( + ApiEndpoint, + HttpMethod, + SynchronousOperation, + PollingOperation, + EmptyRequest, +) +from comfy_api_nodes.apinode_utils import ( + upload_images_to_comfyapi, + download_url_to_bytesio, +) + + +def upload_image_to_tripo(image, **kwargs): + urls = upload_images_to_comfyapi(image, max_images=1, auth_kwargs=kwargs) + return TripoFileReference(TripoUrlReference(url=urls[0], type="jpeg")) + +def get_model_url_from_response(response: TripoTaskResponse) -> str: + if response.data is not None: + for key in ["pbr_model", "model", "base_model"]: + if getattr(response.data.output, key, None) is not None: + return getattr(response.data.output, key) + raise RuntimeError(f"Failed to get model url from response: {response}") + + +def poll_until_finished( + kwargs: dict[str, str], + response: TripoTaskResponse, +) -> tuple[str, str]: + """Polls the Tripo API endpoint until the task reaches a terminal state, then returns the response.""" + if response.code != 0: + raise RuntimeError(f"Failed to generate mesh: {response.error}") + task_id = response.data.task_id + response_poll = PollingOperation( + poll_endpoint=ApiEndpoint( + path=f"/proxy/tripo/v2/openapi/task/{task_id}", + method=HttpMethod.GET, + request_model=EmptyRequest, + response_model=TripoTaskResponse, + ), + completed_statuses=[TripoTaskStatus.SUCCESS], + failed_statuses=[ + TripoTaskStatus.FAILED, + TripoTaskStatus.CANCELLED, + TripoTaskStatus.UNKNOWN, + TripoTaskStatus.BANNED, + TripoTaskStatus.EXPIRED, + ], + status_extractor=lambda x: x.data.status, + auth_kwargs=kwargs, + node_id=kwargs["unique_id"], + result_url_extractor=get_model_url_from_response, + progress_extractor=lambda x: x.data.progress, + ).execute() + if response_poll.data.status == TripoTaskStatus.SUCCESS: + url = get_model_url_from_response(response_poll) + bytesio = download_url_to_bytesio(url) + # Save the downloaded model file + model_file = f"tripo_model_{task_id}.glb" + with open(os.path.join(get_output_directory(), model_file), "wb") as f: + f.write(bytesio.getvalue()) + return model_file, task_id + raise RuntimeError(f"Failed to generate mesh: {response_poll}") + +class TripoTextToModelNode: + """ + Generates 3D models synchronously based on a text prompt using Tripo's API. + """ + AVERAGE_DURATION = 80 + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "prompt": ("STRING", {"multiline": True}), + }, + "optional": { + "negative_prompt": ("STRING", {"multiline": True}), + "model_version": model_field_to_node_input(IO.COMBO, TripoTextToModelRequest, "model_version", enum_type=TripoModelVersion), + "style": model_field_to_node_input(IO.COMBO, TripoTextToModelRequest, "style", enum_type=TripoStyle, default="None"), + "texture": ("BOOLEAN", {"default": True}), + "pbr": ("BOOLEAN", {"default": True}), + "image_seed": ("INT", {"default": 42}), + "model_seed": ("INT", {"default": 42}), + "texture_seed": ("INT", {"default": 42}), + "texture_quality": (["standard", "detailed"], {"default": "standard"}), + "face_limit": ("INT", {"min": -1, "max": 500000, "default": -1}), + "quad": ("BOOLEAN", {"default": False}) + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + "unique_id": "UNIQUE_ID", + }, + } + + RETURN_TYPES = ("STRING", "MODEL_TASK_ID",) + RETURN_NAMES = ("model_file", "model task_id") + FUNCTION = "generate_mesh" + CATEGORY = "api node/3d/Tripo" + API_NODE = True + OUTPUT_NODE = True + + def generate_mesh(self, prompt, negative_prompt=None, model_version=None, style=None, texture=None, pbr=None, image_seed=None, model_seed=None, texture_seed=None, texture_quality=None, face_limit=None, quad=None, **kwargs): + style_enum = None if style == "None" else style + if not prompt: + raise RuntimeError("Prompt is required") + response = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/tripo/v2/openapi/task", + method=HttpMethod.POST, + request_model=TripoTextToModelRequest, + response_model=TripoTaskResponse, + ), + request=TripoTextToModelRequest( + type=TripoTaskType.TEXT_TO_MODEL, + prompt=prompt, + negative_prompt=negative_prompt if negative_prompt else None, + model_version=model_version, + style=style_enum, + texture=texture, + pbr=pbr, + image_seed=image_seed, + model_seed=model_seed, + texture_seed=texture_seed, + texture_quality=texture_quality, + face_limit=face_limit, + auto_size=True, + quad=quad + ), + auth_kwargs=kwargs, + ).execute() + return poll_until_finished(kwargs, response) + +class TripoImageToModelNode: + """ + Generates 3D models synchronously based on a single image using Tripo's API. + """ + AVERAGE_DURATION = 80 + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ("IMAGE",), + }, + "optional": { + "model_version": model_field_to_node_input(IO.COMBO, TripoImageToModelRequest, "model_version", enum_type=TripoModelVersion), + "style": model_field_to_node_input(IO.COMBO, TripoTextToModelRequest, "style", enum_type=TripoStyle, default="None"), + "texture": ("BOOLEAN", {"default": True}), + "pbr": ("BOOLEAN", {"default": True}), + "model_seed": ("INT", {"default": 42}), + "orientation": model_field_to_node_input(IO.COMBO, TripoImageToModelRequest, "orientation", enum_type=TripoOrientation), + "texture_seed": ("INT", {"default": 42}), + "texture_quality": (["standard", "detailed"], {"default": "standard"}), + "texture_alignment": (["original_image", "geometry"], {"default": "original_image"}), + "face_limit": ("INT", {"min": -1, "max": 500000, "default": -1}), + "quad": ("BOOLEAN", {"default": False}) + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + "unique_id": "UNIQUE_ID", + }, + } + + RETURN_TYPES = ("STRING", "MODEL_TASK_ID",) + RETURN_NAMES = ("model_file", "model task_id") + FUNCTION = "generate_mesh" + CATEGORY = "api node/3d/Tripo" + API_NODE = True + OUTPUT_NODE = True + + def generate_mesh(self, image, model_version=None, style=None, texture=None, pbr=None, model_seed=None, orientation=None, texture_alignment=None, texture_seed=None, texture_quality=None, face_limit=None, quad=None, **kwargs): + style_enum = None if style == "None" else style + if image is None: + raise RuntimeError("Image is required") + tripo_file = upload_image_to_tripo(image, **kwargs) + response = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/tripo/v2/openapi/task", + method=HttpMethod.POST, + request_model=TripoImageToModelRequest, + response_model=TripoTaskResponse, + ), + request=TripoImageToModelRequest( + type=TripoTaskType.IMAGE_TO_MODEL, + file=tripo_file, + model_version=model_version, + style=style_enum, + texture=texture, + pbr=pbr, + model_seed=model_seed, + orientation=orientation, + texture_alignment=texture_alignment, + texture_seed=texture_seed, + texture_quality=texture_quality, + face_limit=face_limit, + auto_size=True, + quad=quad + ), + auth_kwargs=kwargs, + ).execute() + return poll_until_finished(kwargs, response) + +class TripoMultiviewToModelNode: + """ + Generates 3D models synchronously based on up to four images (front, left, back, right) using Tripo's API. + """ + AVERAGE_DURATION = 80 + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "image": ("IMAGE",), + }, + "optional": { + "image_left": ("IMAGE",), + "image_back": ("IMAGE",), + "image_right": ("IMAGE",), + "model_version": model_field_to_node_input(IO.COMBO, TripoMultiviewToModelRequest, "model_version", enum_type=TripoModelVersion), + "orientation": model_field_to_node_input(IO.COMBO, TripoImageToModelRequest, "orientation", enum_type=TripoOrientation), + "texture": ("BOOLEAN", {"default": True}), + "pbr": ("BOOLEAN", {"default": True}), + "model_seed": ("INT", {"default": 42}), + "texture_seed": ("INT", {"default": 42}), + "texture_quality": (["standard", "detailed"], {"default": "standard"}), + "texture_alignment": (["original_image", "geometry"], {"default": "original_image"}), + "face_limit": ("INT", {"min": -1, "max": 500000, "default": -1}), + "quad": ("BOOLEAN", {"default": False}) + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + "unique_id": "UNIQUE_ID", + }, + } + + RETURN_TYPES = ("STRING", "MODEL_TASK_ID",) + RETURN_NAMES = ("model_file", "model task_id") + FUNCTION = "generate_mesh" + CATEGORY = "api node/3d/Tripo" + API_NODE = True + OUTPUT_NODE = True + + def generate_mesh(self, image, image_left=None, image_back=None, image_right=None, model_version=None, orientation=None, texture=None, pbr=None, model_seed=None, texture_seed=None, texture_quality=None, texture_alignment=None, face_limit=None, quad=None, **kwargs): + if image is None: + raise RuntimeError("front image for multiview is required") + images = [] + image_dict = { + "image": image, + "image_left": image_left, + "image_back": image_back, + "image_right": image_right + } + if image_left is None and image_back is None and image_right is None: + raise RuntimeError("At least one of left, back, or right image must be provided for multiview") + for image_name in ["image", "image_left", "image_back", "image_right"]: + image_ = image_dict[image_name] + if image_ is not None: + tripo_file = upload_image_to_tripo(image_, **kwargs) + images.append(tripo_file) + else: + images.append(TripoFileEmptyReference()) + response = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/tripo/v2/openapi/task", + method=HttpMethod.POST, + request_model=TripoMultiviewToModelRequest, + response_model=TripoTaskResponse, + ), + request=TripoMultiviewToModelRequest( + type=TripoTaskType.MULTIVIEW_TO_MODEL, + files=images, + model_version=model_version, + orientation=orientation, + texture=texture, + pbr=pbr, + model_seed=model_seed, + texture_seed=texture_seed, + texture_quality=texture_quality, + texture_alignment=texture_alignment, + face_limit=face_limit, + quad=quad, + ), + auth_kwargs=kwargs, + ).execute() + return poll_until_finished(kwargs, response) + +class TripoTextureNode: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "model_task_id": ("MODEL_TASK_ID",), + }, + "optional": { + "texture": ("BOOLEAN", {"default": True}), + "pbr": ("BOOLEAN", {"default": True}), + "texture_seed": ("INT", {"default": 42}), + "texture_quality": (["standard", "detailed"], {"default": "standard"}), + "texture_alignment": (["original_image", "geometry"], {"default": "original_image"}), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + "unique_id": "UNIQUE_ID", + }, + } + + RETURN_TYPES = ("STRING", "MODEL_TASK_ID",) + RETURN_NAMES = ("model_file", "model task_id") + FUNCTION = "generate_mesh" + CATEGORY = "api node/3d/Tripo" + API_NODE = True + OUTPUT_NODE = True + AVERAGE_DURATION = 80 + + def generate_mesh(self, model_task_id, texture=None, pbr=None, texture_seed=None, texture_quality=None, texture_alignment=None, **kwargs): + response = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/tripo/v2/openapi/task", + method=HttpMethod.POST, + request_model=TripoTextureModelRequest, + response_model=TripoTaskResponse, + ), + request=TripoTextureModelRequest( + original_model_task_id=model_task_id, + texture=texture, + pbr=pbr, + texture_seed=texture_seed, + texture_quality=texture_quality, + texture_alignment=texture_alignment + ), + auth_kwargs=kwargs, + ).execute() + return poll_until_finished(kwargs, response) + + +class TripoRefineNode: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "model_task_id": ("MODEL_TASK_ID", { + "tooltip": "Must be a v1.4 Tripo model" + }), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + "unique_id": "UNIQUE_ID", + }, + } + + DESCRIPTION = "Refine a draft model created by v1.4 Tripo models only." + + RETURN_TYPES = ("STRING", "MODEL_TASK_ID",) + RETURN_NAMES = ("model_file", "model task_id") + FUNCTION = "generate_mesh" + CATEGORY = "api node/3d/Tripo" + API_NODE = True + OUTPUT_NODE = True + AVERAGE_DURATION = 240 + + def generate_mesh(self, model_task_id, **kwargs): + response = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/tripo/v2/openapi/task", + method=HttpMethod.POST, + request_model=TripoRefineModelRequest, + response_model=TripoTaskResponse, + ), + request=TripoRefineModelRequest( + draft_model_task_id=model_task_id + ), + auth_kwargs=kwargs, + ).execute() + return poll_until_finished(kwargs, response) + + +class TripoRigNode: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "original_model_task_id": ("MODEL_TASK_ID",), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + "unique_id": "UNIQUE_ID", + }, + } + + RETURN_TYPES = ("STRING", "RIG_TASK_ID") + RETURN_NAMES = ("model_file", "rig task_id") + FUNCTION = "generate_mesh" + CATEGORY = "api node/3d/Tripo" + API_NODE = True + OUTPUT_NODE = True + AVERAGE_DURATION = 180 + + def generate_mesh(self, original_model_task_id, **kwargs): + response = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/tripo/v2/openapi/task", + method=HttpMethod.POST, + request_model=TripoAnimateRigRequest, + response_model=TripoTaskResponse, + ), + request=TripoAnimateRigRequest( + original_model_task_id=original_model_task_id, + out_format="glb", + spec="tripo" + ), + auth_kwargs=kwargs, + ).execute() + return poll_until_finished(kwargs, response) + +class TripoRetargetNode: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "original_model_task_id": ("RIG_TASK_ID",), + "animation": ([ + "preset:idle", + "preset:walk", + "preset:climb", + "preset:jump", + "preset:slash", + "preset:shoot", + "preset:hurt", + "preset:fall", + "preset:turn", + ],), + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + "unique_id": "UNIQUE_ID", + }, + } + + RETURN_TYPES = ("STRING", "RETARGET_TASK_ID") + RETURN_NAMES = ("model_file", "retarget task_id") + FUNCTION = "generate_mesh" + CATEGORY = "api node/3d/Tripo" + API_NODE = True + OUTPUT_NODE = True + AVERAGE_DURATION = 30 + + def generate_mesh(self, animation, original_model_task_id, **kwargs): + response = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/tripo/v2/openapi/task", + method=HttpMethod.POST, + request_model=TripoAnimateRetargetRequest, + response_model=TripoTaskResponse, + ), + request=TripoAnimateRetargetRequest( + original_model_task_id=original_model_task_id, + animation=animation, + out_format="glb", + bake_animation=True + ), + auth_kwargs=kwargs, + ).execute() + return poll_until_finished(kwargs, response) + +class TripoConversionNode: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "original_model_task_id": ("MODEL_TASK_ID,RIG_TASK_ID,RETARGET_TASK_ID",), + "format": (["GLTF", "USDZ", "FBX", "OBJ", "STL", "3MF"],), + }, + "optional": { + "quad": ("BOOLEAN", {"default": False}), + "face_limit": ("INT", {"min": -1, "max": 500000, "default": -1}), + "texture_size": ("INT", {"min": 128, "max": 4096, "default": 4096}), + "texture_format": (["BMP", "DPX", "HDR", "JPEG", "OPEN_EXR", "PNG", "TARGA", "TIFF", "WEBP"], {"default": "JPEG"}) + }, + "hidden": { + "auth_token": "AUTH_TOKEN_COMFY_ORG", + "comfy_api_key": "API_KEY_COMFY_ORG", + "unique_id": "UNIQUE_ID", + }, + } + + @classmethod + def VALIDATE_INPUTS(cls, input_types): + # The min and max of input1 and input2 are still validated because + # we didn't take `input1` or `input2` as arguments + if input_types["original_model_task_id"] not in ("MODEL_TASK_ID", "RIG_TASK_ID", "RETARGET_TASK_ID"): + return "original_model_task_id must be MODEL_TASK_ID, RIG_TASK_ID or RETARGET_TASK_ID type" + return True + + RETURN_TYPES = () + FUNCTION = "generate_mesh" + CATEGORY = "api node/3d/Tripo" + API_NODE = True + OUTPUT_NODE = True + AVERAGE_DURATION = 30 + + def generate_mesh(self, original_model_task_id, format, quad, face_limit, texture_size, texture_format, **kwargs): + if not original_model_task_id: + raise RuntimeError("original_model_task_id is required") + response = SynchronousOperation( + endpoint=ApiEndpoint( + path="/proxy/tripo/v2/openapi/task", + method=HttpMethod.POST, + request_model=TripoConvertModelRequest, + response_model=TripoTaskResponse, + ), + request=TripoConvertModelRequest( + original_model_task_id=original_model_task_id, + format=format, + quad=quad if quad else None, + face_limit=face_limit if face_limit != -1 else None, + texture_size=texture_size if texture_size != 4096 else None, + texture_format=texture_format if texture_format != "JPEG" else None + ), + auth_kwargs=kwargs, + ).execute() + return poll_until_finished(kwargs, response) + +NODE_CLASS_MAPPINGS = { + "TripoTextToModelNode": TripoTextToModelNode, + "TripoImageToModelNode": TripoImageToModelNode, + "TripoMultiviewToModelNode": TripoMultiviewToModelNode, + "TripoTextureNode": TripoTextureNode, + "TripoRefineNode": TripoRefineNode, + "TripoRigNode": TripoRigNode, + "TripoRetargetNode": TripoRetargetNode, + "TripoConversionNode": TripoConversionNode, +} + +NODE_DISPLAY_NAME_MAPPINGS = { + "TripoTextToModelNode": "Tripo: Text to Model", + "TripoImageToModelNode": "Tripo: Image to Model", + "TripoMultiviewToModelNode": "Tripo: Multiview to Model", + "TripoTextureNode": "Tripo: Texture model", + "TripoRefineNode": "Tripo: Refine Draft model", + "TripoRigNode": "Tripo: Rig model", + "TripoRetargetNode": "Tripo: Retarget rigged model", + "TripoConversionNode": "Tripo: Convert model", +} diff --git a/nodes.py b/nodes.py index 1e328651bb7..2d499051eb2 100644 --- a/nodes.py +++ b/nodes.py @@ -2281,6 +2281,10 @@ def init_builtin_api_nodes(): "nodes_pixverse.py", "nodes_stability.py", "nodes_pika.py", + "nodes_runway.py", + "nodes_tripo.py", + "nodes_rodin.py", + "nodes_gemini.py", ] if not load_custom_node(os.path.join(api_nodes_dir, "canary.py"), module_parent="comfy_api_nodes"): diff --git a/requirements.txt b/requirements.txt index 48631633d99..38991dbf9aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -comfyui-frontend-package==1.20.5 -comfyui-workflow-templates==0.1.18 +comfyui-frontend-package==1.20.6 +comfyui-workflow-templates==0.1.20 torch torchsde torchvision