diff --git a/.spi.yml b/.spi.yml new file mode 100644 index 0000000..1891e8f --- /dev/null +++ b/.spi.yml @@ -0,0 +1,4 @@ +version: 1 +builder: + configs: + - documentation_targets: [coreml-stable-diffusion-swift] diff --git a/LICENSE b/LICENSE index b2bea10..a13e43d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2023 Igor +Copyright (c) 2023 Igor Shelopaev Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 3ec9c06..3a707a1 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,12 @@ # CoreML stable diffusion image generation -[![](https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com%2Fapi%2Fpackages%2FThe-Igor%2Fcoreml-stable-diffusion-swift%2Fbadge%3Ftype%3Dswift-versions)](https://swiftpackageindex.com/The-Igor/coreml-stable-diffusion-swift) -[![](https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com%2Fapi%2Fpackages%2FThe-Igor%2Fcoreml-stable-diffusion-swift%2Fbadge%3Ftype%3Dplatforms)](https://swiftpackageindex.com/The-Igor/coreml-stable-diffusion-swift) +[![](https://img.shields.io/endpoint?url=https%3A%2F%2Fswiftpackageindex.com%2Fapi%2Fpackages%2Fswiftuiux%2Fcoreml-stable-diffusion-swift%2Fbadge%3Ftype%3Dplatforms)](https://swiftpackageindex.com/swiftuiux/coreml-stable-diffusion-swift) -The package is a mediator between [Apple's Core ML Stable Diffusion implementation](https://github.com/apple/ml-stable-diffusion) and your app that let you run text-to-image or image-to-image models +The package is a mediator between Apple's Core ML Stable Diffusion implementation and your app that let you run text-to-image or image-to-image models + +## [SwiftUI example](https://github.com/swiftuiux/coreml-stable-diffusion-swift-example) + +## [Documentation(API)](https://swiftpackageindex.com/swiftuiux/coreml-stable-diffusion-swift/main/documentation/coreml_stable_diffusion_swift) ## How to use the package ### 1. Create GenerativeManager @@ -24,13 +27,25 @@ The package is a mediator between [Apple's Core ML Stable Diffusion implementati The speed can be unpredictable. Sometimes a model will suddenly run a lot slower than before. It appears as if Core ML is trying to be smart in how it schedules things, but doesn’t always optimal. -## SwiftUI example -[CoreML stable diffusion image generation](https://github.com/The-Igor/coreml-stable-diffusion-swift-example) - ![The concept](https://github.com/The-Igor/coreml-stable-diffusion-swift-example/blob/main/img/img_01.png) + ![The concept](https://github.com/swiftuiux/coreml-stable-diffusion-swift-example/blob/main/img/img_01.png) + + ![The concept](https://github.com/swiftuiux/coreml-stable-diffusion-swift-example/blob/main/img/img_03.png) - ![The concept](https://github.com/The-Igor/coreml-stable-diffusion-swift-example/blob/main/img/img_03.png) +### Typical set of files for a model und the purpose of each file + +| File Name | Description | +|--------------------------------------|------------------------------------------------------------------| +| `TextEncoder.mlmodelc` | Encodes input text into a vector space for further processing. | +| `Unet.mlmodelc` | Core model handling the transformation of encoded vectors into intermediate image representations. | +| `UnetChunk1.mlmodelc` | First segment of a segmented U-Net model for optimized processing in environments with memory constraints. | +| `UnetChunk2.mlmodelc` | Second segment of the segmented U-Net model, completing the tasks started by the first chunk. | +| `VAEDecoder.mlmodelc` | Decodes the latent representations into final image outputs. | +| `VAEEncoder.mlmodelc` | Compresses input image data into a latent space for reconstruction or further processing. | +| `SafetyChecker.mlmodelc` | Ensures generated content adheres to safety guidelines by checking against predefined criteria. | +| `vocab.json` | Contains the vocabulary used by the text encoder for tokenization and encoding processes. | +| `merges.txt` | Stores the merging rules for byte-pair encoding used in the text encoder. | ## Documentation(API) - You need to have Xcode 13 installed in order to have access to Documentation Compiler (DocC) diff --git a/Sources/coreml-stable-diffusion-swift/ext/NSImage+.swift b/Sources/coreml-stable-diffusion-swift/ext/NSImage+.swift index 2c1a70c..5332c7a 100644 --- a/Sources/coreml-stable-diffusion-swift/ext/NSImage+.swift +++ b/Sources/coreml-stable-diffusion-swift/ext/NSImage+.swift @@ -29,7 +29,6 @@ extension NSImage { } } -/// https://gist.github.com/MaciejGad/11d8469b218817290ee77012edb46608 @available(macOS 13.1, *) extension NSImage { @@ -52,97 +51,58 @@ extension NSImage { return nil } - /// Copies the current image and resizes it to the given size. - /// - /// - parameter size: The size of the new image. - /// - /// - returns: The resized copy of the given image. - func copy(size: NSSize) -> NSImage? { - // Create a new rect with given width and height - let frame = NSMakeRect(0, 0, size.width, size.height) + /// Resizes and crops the image to the specified size. + /// - Parameter innerSize: The target size to resize and crop the image to. + /// - Returns: A new `NSImage` object that is resized and cropped to the specified size, or `nil` if the operation fails. + func resizeAndCrop(to innerSize: NSSize) -> NSImage? { + let aspectWidth = innerSize.width / self.size.width + let aspectHeight = innerSize.height / self.size.height + let aspectRatio = max(aspectWidth, aspectHeight) + + let aspectFillSize = NSSize(width: self.size.width * aspectRatio, height: self.size.height * aspectRatio) - // Get the best representation for the given size. - guard let rep = self.bestRepresentation(for: frame, context: nil, hints: nil) else { + guard let resizedImage = self.resize(to: aspectFillSize) else { return nil } - // Create an empty image with the given size. - let img = NSImage(size: size) + let croppedImage = resizedImage.crop(to: innerSize) - // Set the drawing context and make sure to remove the focus before returning. - img.lockFocus() - defer { img.unlockFocus() } - - // Draw the new image - if rep.draw(in: frame) { - return img - } - - // Return nil in case something went wrong. - return nil + return croppedImage } - /// Copies the current image and resizes it to the size of the given NSSize, while - /// maintaining the aspect ratio of the original image. - /// - /// - parameter size: The size of the new image. - /// - /// - returns: The resized copy of the given image. - func resizeWhileMaintainingAspectRatioToSize(size: NSSize) -> NSImage? { - let newSize: NSSize - - let widthRatio = size.width / self.width - let heightRatio = size.height / self.height - - if widthRatio > heightRatio { - newSize = NSSize(width: floor(self.width * widthRatio), height: floor(self.height * widthRatio)) - } else { - newSize = NSSize(width: floor(self.width * heightRatio), height: floor(self.height * heightRatio)) - } - - return self.copy(size: newSize) + /// Resizes the image to the specified size while maintaining the aspect ratio. + /// - Parameter size: The target size to resize the image to. + /// - Returns: A new `NSImage` object that is resized to the specified size, or `nil` if the operation fails. + func resize(to size: NSSize) -> NSImage? { + let newImage = NSImage(size: size) + newImage.lockFocus() + self.draw(in: NSRect(origin: .zero, size: size), + from: NSRect(origin: .zero, size: self.size), + operation: .copy, + fraction: 1.0) + newImage.unlockFocus() + return newImage } - /// Copies and crops an image to the supplied size. - /// - /// - parameter size: The size of the new image. - /// - /// - returns: The cropped copy of the given image. - func crop(size: NSSize) -> NSImage? { - // Resize the current image, while preserving the aspect ratio. - guard let resized = self.resizeWhileMaintainingAspectRatioToSize(size: size) else { + /// Crops the image to the specified size. + /// - Parameter size: The target size to crop the image to. + /// - Returns: A new `NSImage` object that is cropped to the specified size, or `nil` if the operation fails. + func crop(to size: NSSize) -> NSImage? { + guard let cgImage = self.cgImage(forProposedRect: nil, context: nil, hints: nil) else { return nil } - // Get some points to center the cropping area. - let x = floor((resized.width - size.width) / 2) - let y = floor((resized.height - size.height) / 2) - // Create the cropping frame. - let frame = NSMakeRect(x, y, size.width, size.height) + let croppingRect = CGRect(x: (cgImage.width - Int(size.width)) / 2, + y: (cgImage.height - Int(size.height)) / 2, + width: Int(size.width), + height: Int(size.height)) - // Get the best representation of the image for the given cropping frame. - guard let rep = resized.bestRepresentation(for: frame, context: nil, hints: nil) else { + guard let croppedCGImage = cgImage.cropping(to: croppingRect) else { return nil } - // Create a new image with the new size - let img = NSImage(size: size) - - img.lockFocus() - defer { img.unlockFocus() } - - if rep.draw(in: NSMakeRect(0, 0, size.width, size.height), - from: frame, - operation: NSCompositingOperation.copy, - fraction: 1.0, - respectFlipped: false, - hints: [:]) { - // Return the cropped image. - return img - } - - // Return nil in case anything fails. - return nil + let croppedImage = NSImage(cgImage: croppedCGImage, size: size) + return croppedImage } /// Saves the PNG representation of the current image to the HD. @@ -156,4 +116,3 @@ extension NSImage { } #endif - diff --git a/Sources/coreml-stable-diffusion-swift/helper/HelperImage.swift b/Sources/coreml-stable-diffusion-swift/helper/HelperImage.swift index 78c4725..7e21e12 100644 --- a/Sources/coreml-stable-diffusion-swift/helper/HelperImage.swift +++ b/Sources/coreml-stable-diffusion-swift/helper/HelperImage.swift @@ -28,11 +28,11 @@ public func getNSImage(from data : Data?, cropped toSize : NSSize? = nil) async return nil } - guard let size = toSize else{ + guard let size = toSize ?? adjustSize(for: nsImage) else{ return nsImage } - guard let cropped = nsImage.crop(size: size)else{ + guard let cropped = nsImage.resizeAndCrop(to: size)else{ return nil } @@ -54,4 +54,30 @@ public func getImage(cgImage : CGImage?) -> Image?{ return Image(nsImage: nsImage) } +/// Determines the largest square size from a predefined set of sizes that can fit within the given image's dimensions. +/// - Parameter image: The input `NSImage` to be adjusted. +/// - Returns: An `NSSize` representing the largest possible square size that fits within the image's dimensions, or `nil` if the image is too small. +@available(macOS 13.1, *) +func adjustSize(for image: NSImage) -> NSSize? { + let predefinedSizes: [CGFloat] = [512] + + let width = image.size.width + let height = image.size.height + + guard width >= 256, height >= 256 else { + return nil + } + + let minDimension = min(width, height) + + // Find the largest predefined size that can fit within the image dimensions + for size in predefinedSizes.reversed() { + if minDimension >= size { + return NSSize(width: size, height: size) + } + } + + return nil +} + #endif diff --git a/Sources/coreml-stable-diffusion-swift/helper/HelperPipeline.swift b/Sources/coreml-stable-diffusion-swift/helper/HelperPipeline.swift index f4b516d..736fc28 100644 --- a/Sources/coreml-stable-diffusion-swift/helper/HelperPipeline.swift +++ b/Sources/coreml-stable-diffusion-swift/helper/HelperPipeline.swift @@ -9,29 +9,38 @@ import Foundation import StableDiffusion import CoreML -/// Get diffusion pipeline +/// Retrieves a StableDiffusionPipeline configured with the specified parameters. +/// /// - Parameters: -/// - url: The path to the models files -/// - disableSafety: Disable safety check -/// - reduceMemory: Reduce memory if you use it on devises with limited memory amount -/// - computeUnits: The set of processing-unit configurations the model can use to make predictions -/// - Throws: Errors while creating pipeline -/// - Returns: A pipeline used to generate image samples from text input using stable diffusion +/// - url: The URL where the pipeline resources are located. +/// - disableSafety: A boolean flag to disable safety checks. Default is `false`. +/// - reduceMemory: A boolean flag to reduce memory usage. Default is `false`. +/// - computeUnits: The compute units to be used by the pipeline. Default is `.cpuAndGPU`. +/// +/// - Throws: An error if the pipeline initialization fails. +/// +/// - Returns: A configured instance of `StableDiffusionPipeline`. + @available(iOS 16.2, macOS 13.1, *) public func getDiffusionPipeline( for url : URL, + _ controlNet: [String] = [], _ disableSafety : Bool = false, _ reduceMemory : Bool = false, - _ computeUnits : MLComputeUnits = .cpuAndNeuralEngine -) throws -> StableDiffusionPipeline{ + _ computeUnits : MLComputeUnits = .cpuAndGPU +) throws -> StableDiffusionPipeline { + // Initialize the MLModelConfiguration with the specified compute units. let config: MLModelConfiguration = .init() config.computeUnits = computeUnits + // Return a new StableDiffusionPipeline instance, using the provided URL for resources. + // The pipeline is configured with optional parameters to disable safety checks and reduce memory usage. return try .init( - resourcesAt: url, controlNet: [], - configuration : config, - disableSafety: disableSafety, - reduceMemory : reduceMemory + resourcesAt: url, // The URL where the pipeline resources are located. + controlNet: controlNet, // An empty array for controlNet as no control network is being passed. + configuration: config, // The MLModelConfiguration object with the compute units setting. + disableSafety: disableSafety, // A boolean flag to disable safety checks if set to true. + reduceMemory: reduceMemory // A boolean flag to reduce memory usage if set to true. ) } diff --git a/Sources/coreml-stable-diffusion-swift/manager/GenerativeManager.swift b/Sources/coreml-stable-diffusion-swift/manager/GenerativeManager.swift index e83204f..8c09327 100644 --- a/Sources/coreml-stable-diffusion-swift/manager/GenerativeManager.swift +++ b/Sources/coreml-stable-diffusion-swift/manager/GenerativeManager.swift @@ -40,9 +40,7 @@ public actor GenerativeManager: IGenerativeManager{ ) async throws -> [CGImage?] { try pipeline.generateImages(configuration: config) { progress in - #if DEBUG logger.info("Progress step: \(progress.step)") - #endif return !Task.isCancelled } } diff --git a/Sources/coreml-stable-diffusion-swift/model/GenerativeModel.swift b/Sources/coreml-stable-diffusion-swift/model/GenerativeModel.swift index 773333f..e211357 100644 --- a/Sources/coreml-stable-diffusion-swift/model/GenerativeModel.swift +++ b/Sources/coreml-stable-diffusion-swift/model/GenerativeModel.swift @@ -6,17 +6,12 @@ // import Foundation +import StableDiffusion /// Model profile description @available(iOS 16.2, macOS 13.1, *) public struct GenerativeModel: Hashable, Identifiable{ - - // MARK: - Life circle - public init(url: URL? = nil, name: String) { - self.url = url - self.name = name - } - + /// Identifier public let id = UUID() @@ -26,4 +21,11 @@ public struct GenerativeModel: Hashable, Identifiable{ /// Name of the model public let name: String + // MARK: - Life circle + public init(url: URL? = nil, name: String) { + self.url = url + self.name = name + } } + +