LukeLIN-web
diff --git a/‎README.md
Lines changed: 5 additions & 4 deletions b/‎README.md
Lines changed: 5 additions & 4 deletions
diff --git a/‎assets/flux/flux1-dev-q2_k.png
416 KB b/‎assets/flux/flux1-dev-q2_k.png
416 KB
diff --git a/‎assets/flux/flux1-dev-q3_k.png
490 KB b/‎assets/flux/flux1-dev-q3_k.png
490 KB
diff --git a/‎assets/flux/flux1-dev-q4_0.png
464 KB b/‎assets/flux/flux1-dev-q4_0.png
464 KB
diff --git a/‎assets/flux/flux1-dev-q8_0 with lora.png
566 KB b/‎assets/flux/flux1-dev-q8_0 with lora.png
566 KB
diff --git a/‎assets/flux/flux1-dev-q8_0.png
475 KB b/‎assets/flux/flux1-dev-q8_0.png
475 KB
diff --git a/‎assets/flux/flux1-schnell-q8_0.png
481 KB b/‎assets/flux/flux1-schnell-q8_0.png
481 KB
diff --git a/‎common.hpp
Lines changed: 1 addition & 1 deletion b/‎common.hpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎conditioner.hpp
Lines changed: 241 additions & 15 deletions b/‎conditioner.hpp
Lines changed: 241 additions & 15 deletions
diff --git a/‎control.hpp
Lines changed: 9 additions & 9 deletions b/‎control.hpp
Lines changed: 9 additions & 9 deletions
@@ -12,11 +12,12 @@ Inference of [Stable Diffusion](https://github.com/CompVis/stable-diffusion) in
 - Super lightweight and without external dependencies
 - SD1.x, SD2.x, SDXL and SD3 support
     - !!!The VAE in SDXL encounters NaN issues under FP16, but unfortunately, the ggml_conv_2d only operates under FP16. Hence, a parameter is needed to specify the VAE that has fixed the FP16 NaN issue. You can find it here: [SDXL VAE FP16 Fix](https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl_vae.safetensors).
+- [Flux-dev/Flux-schnell Support](./docs/flux.md)
 
 - [SD-Turbo](https://huggingface.co/stabilityai/sd-turbo) and [SDXL-Turbo](https://huggingface.co/stabilityai/sdxl-turbo) support
 - [PhotoMaker](https://github.com/TencentARC/PhotoMaker) support.
 - 16-bit, 32-bit float support
-- 4-bit, 5-bit and 8-bit integer quantization support
+- 2-bit, 3-bit, 4-bit, 5-bit and 8-bit integer quantization support
 - Accelerated memory-efficient CPU inference
     - Only requires ~2.3GB when using txt2img with fp16 precision to generate a 512x512 image, enabling Flash Attention just requires ~1.8GB.
 - AVX, AVX2 and AVX512 support for x86 architectures
@@ -57,7 +58,6 @@ Inference of [Stable Diffusion](https://github.com/CompVis/stable-diffusion) in
     - The current implementation of ggml_conv_2d is slow and has high memory usage
 - [ ] Continuing to reduce memory usage (quantizing the weights of ggml_conv_2d)
 - [ ] Implement Inpainting support
-- [ ] k-quants support
 
 ## Usage
 
@@ -202,7 +202,7 @@ arguments:
   --normalize-input                  normalize PHOTOMAKER input id images
   --upscale-model [ESRGAN_PATH]      path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now.
   --upscale-repeats                  Run the ESRGAN upscaler this many times (default 1)
-  --type [TYPE]                      weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0)
+  --type [TYPE]                      weight type (f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_k, q3_k, q4_k)
                                      If not specified, the default is the type of the weight file.
   --lora-model-dir [DIR]             lora model directory
   -i, --init-img [IMAGE]             path to the input image, required by img2img
@@ -229,7 +229,7 @@ arguments:
   --vae-tiling                       process vae in tiles to reduce memory usage
   --control-net-cpu                  keep controlnet in cpu (for low vram)
   --canny                            apply canny preprocessor (edge detection)
-  --color                            colors the logging tags according to level
+  --color                            Colors the logging tags according to level
   -v, --verbose                      print extra info
 ```
 
@@ -240,6 +240,7 @@ arguments:
 # ./bin/sd -m ../models/v1-5-pruned-emaonly.safetensors -p "a lovely cat"
 # ./bin/sd -m ../models/sd_xl_base_1.0.safetensors --vae ../models/sdxl_vae-fp16-fix.safetensors -H 1024 -W 1024 -p "a lovely cat" -v
 # ./bin/sd -m ../models/sd3_medium_incl_clips_t5xxlfp16.safetensors -H 1024 -W 1024 -p 'a lovely cat holding a sign says \"Stable Diffusion CPP\"' --cfg-scale 4.5 --sampling-method euler -v
+# ./bin/sd --diffusion-model  ../models/flux1-dev-q3_k.gguf --vae ../models/ae.sft --clip_l ../models/clip_l.safetensors --t5xxl ../models/t5xxl_fp16.safetensors  -p "a lovely cat holding a sign says 'flux.cpp'" --cfg-scale 1.0 --sampling-method euler -v
 ```
 
 Using formats of different precisions will yield results of varying quality.
 
@@ -367,7 +367,7 @@ class SpatialTransformer : public GGMLBlock {
     int64_t n_head;
     int64_t d_head;
     int64_t depth       = 1;    // 1
-    int64_t context_dim = 768;  // hidden_size, 1024 for VERSION_2_x
+    int64_t context_dim = 768;  // hidden_size, 1024 for VERSION_SD2
 
 public:
     SpatialTransformer(int64_t in_channels,
 
@@ -14,7 +14,7 @@
 */
 class ControlNetBlock : public GGMLBlock {
 protected:
-    SDVersion version = VERSION_1_x;
+    SDVersion version = VERSION_SD1;
     // network hparams
     int in_channels                        = 4;
     int out_channels                       = 4;
@@ -26,19 +26,19 @@ class ControlNetBlock : public GGMLBlock {
     int time_embed_dim                     = 1280;  // model_channels*4
     int num_heads                          = 8;
     int num_head_channels                  = -1;   // channels // num_heads
-    int context_dim                        = 768;  // 1024 for VERSION_2_x, 2048 for VERSION_XL
+    int context_dim                        = 768;  // 1024 for VERSION_SD2, 2048 for VERSION_SDXL
 
 public:
     int model_channels  = 320;
-    int adm_in_channels = 2816;  // only for VERSION_XL
+    int adm_in_channels = 2816;  // only for VERSION_SDXL
 
-    ControlNetBlock(SDVersion version = VERSION_1_x)
+    ControlNetBlock(SDVersion version = VERSION_SD1)
         : version(version) {
-        if (version == VERSION_2_x) {
+        if (version == VERSION_SD2) {
             context_dim       = 1024;
             num_head_channels = 64;
             num_heads         = -1;
-        } else if (version == VERSION_XL) {
+        } else if (version == VERSION_SDXL) {
             context_dim           = 2048;
             attention_resolutions = {4, 2};
             channel_mult          = {1, 2, 4};
@@ -58,7 +58,7 @@ class ControlNetBlock : public GGMLBlock {
         // time_embed_1 is nn.SiLU()
         blocks["time_embed.2"] = std::shared_ptr<GGMLBlock>(new Linear(time_embed_dim, time_embed_dim));
 
-        if (version == VERSION_XL || version == VERSION_SVD) {
+        if (version == VERSION_SDXL || version == VERSION_SVD) {
             blocks["label_emb.0.0"] = std::shared_ptr<GGMLBlock>(new Linear(adm_in_channels, time_embed_dim));
             // label_emb_1 is nn.SiLU()
             blocks["label_emb.0.2"] = std::shared_ptr<GGMLBlock>(new Linear(time_embed_dim, time_embed_dim));
@@ -307,7 +307,7 @@ class ControlNetBlock : public GGMLBlock {
 };
 
 struct ControlNet : public GGMLRunner {
-    SDVersion version = VERSION_1_x;
+    SDVersion version = VERSION_SD1;
     ControlNetBlock control_net;
 
     ggml_backend_buffer_t control_buffer = NULL;  // keep control output tensors in backend memory
@@ -318,7 +318,7 @@ struct ControlNet : public GGMLRunner {
 
     ControlNet(ggml_backend_t backend,
                ggml_type wtype,
-               SDVersion version = VERSION_1_x)
+               SDVersion version = VERSION_SD1)
         : GGMLRunner(backend, wtype), control_net(version) {
         control_net.init(params_ctx, wtype);
     }