@@ -63,15 +63,15 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
63
63
: version(version), tokenizer(version == VERSION_SD2 ? 0 : 49407 ), embd_dir(embd_dir), wtype(wtype) {
64
64
if (clip_skip <= 0 ) {
65
65
clip_skip = 1 ;
66
- if (version == VERSION_SD2 || version == VERSION_SDXL_BASE || version == VERSION_SDXL_REFINER) {
66
+ if (version == VERSION_SD2 || version == VERSION_SDXL || version == VERSION_SDXL_REFINER) {
67
67
clip_skip = 2 ;
68
68
}
69
69
}
70
70
if (version == VERSION_SD1) {
71
71
text_model = std::make_shared<CLIPTextModelRunner>(backend, wtype, OPENAI_CLIP_VIT_L_14, clip_skip);
72
72
} else if (version == VERSION_SD2) {
73
73
text_model = std::make_shared<CLIPTextModelRunner>(backend, wtype, OPEN_CLIP_VIT_H_14, clip_skip);
74
- } else if (version == VERSION_SDXL_BASE ) {
74
+ } else if (version == VERSION_SDXL ) {
75
75
text_model = std::make_shared<CLIPTextModelRunner>(backend, wtype, OPENAI_CLIP_VIT_L_14, clip_skip, false );
76
76
text_model2 = std::make_shared<CLIPTextModelRunner>(backend, wtype, OPEN_CLIP_VIT_BIGG_14, clip_skip, false );
77
77
} else if (version == VERSION_SDXL_REFINER) {
@@ -83,7 +83,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
83
83
if (version != VERSION_SDXL_REFINER) {
84
84
text_model->set_clip_skip (clip_skip);
85
85
}
86
- if (version == VERSION_SDXL_BASE || version == VERSION_SDXL_REFINER) {
86
+ if (version == VERSION_SDXL || version == VERSION_SDXL_REFINER) {
87
87
text_model2->set_clip_skip (clip_skip);
88
88
}
89
89
}
@@ -92,7 +92,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
92
92
if (version != VERSION_SDXL_REFINER) {
93
93
text_model->get_param_tensors (tensors, " cond_stage_model.transformer.text_model" );
94
94
}
95
- if (version == VERSION_SDXL_BASE || version == VERSION_SDXL_REFINER) {
95
+ if (version == VERSION_SDXL || version == VERSION_SDXL_REFINER) {
96
96
text_model2->get_param_tensors (tensors, " cond_stage_model.1.transformer.text_model" );
97
97
}
98
98
}
@@ -101,7 +101,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
101
101
if (version != VERSION_SDXL_REFINER) {
102
102
text_model->alloc_params_buffer ();
103
103
}
104
- if (version == VERSION_SDXL_BASE || version == VERSION_SDXL_REFINER) {
104
+ if (version == VERSION_SDXL || version == VERSION_SDXL_REFINER) {
105
105
text_model2->alloc_params_buffer ();
106
106
}
107
107
}
@@ -110,7 +110,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
110
110
if (version != VERSION_SDXL_REFINER) {
111
111
text_model->free_params_buffer ();
112
112
}
113
- if (version == VERSION_SDXL_BASE || version == VERSION_SDXL_REFINER) {
113
+ if (version == VERSION_SDXL || version == VERSION_SDXL_REFINER) {
114
114
text_model2->free_params_buffer ();
115
115
}
116
116
}
@@ -120,7 +120,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
120
120
if (version != VERSION_SDXL_REFINER) {
121
121
buffer_size = text_model->get_params_buffer_size ();
122
122
}
123
- if (version == VERSION_SDXL_BASE || version == VERSION_SDXL_REFINER) {
123
+ if (version == VERSION_SDXL || version == VERSION_SDXL_REFINER) {
124
124
buffer_size += text_model2->get_params_buffer_size ();
125
125
}
126
126
return buffer_size;
@@ -411,7 +411,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
411
411
auto input_ids = vector_to_ggml_tensor_i32 (work_ctx, chunk_tokens);
412
412
struct ggml_tensor * input_ids2 = NULL ;
413
413
size_t max_token_idx = 0 ;
414
- if (version == VERSION_SDXL_BASE || version == VERSION_SDXL_REFINER) {
414
+ if (version == VERSION_SDXL || version == VERSION_SDXL_REFINER) {
415
415
auto it = std::find (chunk_tokens.begin (), chunk_tokens.end (), tokenizer.EOS_TOKEN_ID );
416
416
if (it != chunk_tokens.end ()) {
417
417
std::fill (std::next (it), chunk_tokens.end (), 0 );
@@ -438,7 +438,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
438
438
&chunk_hidden_states1,
439
439
work_ctx);
440
440
}
441
- if (version == VERSION_SDXL_BASE || version == VERSION_SDXL_REFINER) {
441
+ if (version == VERSION_SDXL || version == VERSION_SDXL_REFINER) {
442
442
text_model2->compute (n_threads,
443
443
input_ids2,
444
444
0 ,
@@ -497,7 +497,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public Conditioner {
497
497
ggml_nelements (hidden_states) / chunk_hidden_states->ne [0 ]);
498
498
499
499
ggml_tensor* vec = NULL ;
500
- if (version == VERSION_SDXL_BASE || version == VERSION_SDXL_REFINER) {
500
+ if (version == VERSION_SDXL || version == VERSION_SDXL_REFINER) {
501
501
int out_dim = 256 ;
502
502
vec = ggml_new_tensor_1d (work_ctx, GGML_TYPE_F32, adm_in_channels);
503
503
// [0:1280]
0 commit comments