Skip to content

Commit 8557894

Browse files
authored
support clip-vit-large-patch14-336 (comfyanonymous#4042)
* support clip-vit-large-patch14-336 * support clip-vit-large-patch14-336
1 parent 6f7869f commit 8557894

File tree

2 files changed

+22
-1
lines changed

2 files changed

+22
-1
lines changed

comfy/clip_vision.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,10 @@ def load_clipvision_from_sd(sd, prefix="", convert_keys=False):
9494
elif "vision_model.encoder.layers.30.layer_norm1.weight" in sd:
9595
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_h.json")
9696
elif "vision_model.encoder.layers.22.layer_norm1.weight" in sd:
97-
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl.json")
97+
if sd["vision_model.embeddings.position_embedding.weight"].shape[0] == 577:
98+
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl_336.json")
99+
else:
100+
json_config = os.path.join(os.path.dirname(os.path.realpath(__file__)), "clip_vision_config_vitl.json")
98101
else:
99102
return None
100103

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"attention_dropout": 0.0,
3+
"dropout": 0.0,
4+
"hidden_act": "quick_gelu",
5+
"hidden_size": 1024,
6+
"image_size": 336,
7+
"initializer_factor": 1.0,
8+
"initializer_range": 0.02,
9+
"intermediate_size": 4096,
10+
"layer_norm_eps": 1e-5,
11+
"model_type": "clip_vision_model",
12+
"num_attention_heads": 16,
13+
"num_channels": 3,
14+
"num_hidden_layers": 24,
15+
"patch_size": 14,
16+
"projection_dim": 768,
17+
"torch_dtype": "float32"
18+
}

0 commit comments

Comments
 (0)