Skip to content

Commit 96bd162

Browse files
committed
Add cc-by-nc-4.0 license for metaclip, make note in quickgelu model def about pretrained_cfg mapping
1 parent 6894ec7 commit 96bd162

File tree

1 file changed

+16
-6
lines changed

1 file changed

+16
-6
lines changed

timm/models/vision_transformer.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1473,18 +1473,22 @@ def _cfg(url='', **kwargs):
14731473
'vit_base_patch32_clip_224.metaclip_2pt5b': _cfg(
14741474
hf_hub_id='facebook/metaclip-b32-fullcc2.5b',
14751475
hf_hub_filename='metaclip_b32_fullcc2.5b.bin',
1476+
license='cc-by-nc-4.0',
14761477
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=512),
14771478
'vit_base_patch16_clip_224.metaclip_2pt5b': _cfg(
14781479
hf_hub_id='facebook/metaclip-b16-fullcc2.5b',
14791480
hf_hub_filename='metaclip_b16_fullcc2.5b.bin',
1481+
license='cc-by-nc-4.0',
14801482
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=512),
14811483
'vit_large_patch14_clip_224.metaclip_2pt5b': _cfg(
14821484
hf_hub_id='facebook/metaclip-l14-fullcc2.5b',
14831485
hf_hub_filename='metaclip_l14_fullcc2.5b.bin',
1486+
license='cc-by-nc-4.0',
14841487
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=768),
14851488
'vit_huge_patch14_clip_224.metaclip_2pt5b': _cfg(
14861489
hf_hub_id='facebook/metaclip-h14-fullcc2.5b',
14871490
hf_hub_filename='metaclip_h14_fullcc2.5b.bin',
1491+
license='cc-by-nc-4.0',
14881492
mean=OPENAI_CLIP_MEAN, std=OPENAI_CLIP_STD, crop_pct=1.0, num_classes=1024),
14891493

14901494
'vit_base_patch32_clip_224.openai': _cfg(
@@ -2129,7 +2133,8 @@ def vit_base_patch32_clip_quickgelu_224(pretrained=False, **kwargs) -> VisionTra
21292133
patch_size=32, embed_dim=768, depth=12, num_heads=12, pre_norm=True,
21302134
norm_layer=nn.LayerNorm, act_layer='quick_gelu')
21312135
model = _create_vision_transformer(
2132-
'vit_base_patch32_clip_224', pretrained=pretrained, **dict(model_args, **kwargs))
2136+
'vit_base_patch32_clip_224', # map to non quickgelu pretrained_cfg intentionally
2137+
pretrained=pretrained, **dict(model_args, **kwargs))
21332138
return model
21342139

21352140

@@ -2141,7 +2146,8 @@ def vit_base_patch16_clip_quickgelu_224(pretrained=False, **kwargs) -> VisionTra
21412146
patch_size=16, embed_dim=768, depth=12, num_heads=12, pre_norm=True,
21422147
norm_layer=nn.LayerNorm, act_layer='quick_gelu')
21432148
model = _create_vision_transformer(
2144-
'vit_base_patch16_clip_224', pretrained=pretrained, **dict(model_args, **kwargs))
2149+
'vit_base_patch16_clip_224', # map to non quickgelu pretrained_cfg intentionally
2150+
pretrained=pretrained, **dict(model_args, **kwargs))
21452151
return model
21462152

21472153

@@ -2154,7 +2160,8 @@ def vit_large_patch14_clip_quickgelu_224(pretrained=False, **kwargs) -> VisionTr
21542160
patch_size=14, embed_dim=1024, depth=24, num_heads=16, pre_norm=True,
21552161
norm_layer=nn.LayerNorm, act_layer='quick_gelu')
21562162
model = _create_vision_transformer(
2157-
'vit_large_patch14_clip_224', pretrained=pretrained, **dict(model_args, **kwargs))
2163+
'vit_large_patch14_clip_224', # map to non quickgelu pretrained_cfg intentionally
2164+
pretrained=pretrained, **dict(model_args, **kwargs))
21582165
return model
21592166

21602167

@@ -2166,7 +2173,8 @@ def vit_large_patch14_clip_quickgelu_336(pretrained=False, **kwargs) -> VisionTr
21662173
patch_size=14, embed_dim=1024, depth=24, num_heads=16, pre_norm=True,
21672174
norm_layer=nn.LayerNorm, act_layer='quick_gelu')
21682175
model = _create_vision_transformer(
2169-
'vit_large_patch14_clip_336', pretrained=pretrained, **dict(model_args, **kwargs))
2176+
'vit_large_patch14_clip_336', # map to non quickgelu pretrained_cfg intentionally
2177+
pretrained=pretrained, **dict(model_args, **kwargs))
21702178
return model
21712179

21722180

@@ -2178,7 +2186,8 @@ def vit_huge_patch14_clip_quickgelu_224(pretrained=False, **kwargs) -> VisionTra
21782186
patch_size=14, embed_dim=1280, depth=32, num_heads=16, pre_norm=True,
21792187
norm_layer=nn.LayerNorm, act_layer='quick_gelu')
21802188
model = _create_vision_transformer(
2181-
'vit_huge_patch14_clip_224', pretrained=pretrained, **dict(model_args, **kwargs))
2189+
'vit_huge_patch14_clip_224', # map to non quickgelu pretrained_cfg intentionally
2190+
pretrained=pretrained, **dict(model_args, **kwargs))
21822191
return model
21832192

21842193

@@ -2190,7 +2199,8 @@ def vit_huge_patch14_clip_quickgelu_378(pretrained=False, **kwargs) -> VisionTra
21902199
patch_size=14, embed_dim=1280, depth=32, num_heads=16, pre_norm=True,
21912200
norm_layer=nn.LayerNorm, act_layer='quick_gelu')
21922201
model = _create_vision_transformer(
2193-
'vit_huge_patch14_clip_378', pretrained=pretrained, **dict(model_args, **kwargs))
2202+
'vit_huge_patch14_clip_378', # map to non quickgelu pretrained_cfg intentionally
2203+
pretrained=pretrained, **dict(model_args, **kwargs))
21942204
return model
21952205

21962206

0 commit comments

Comments
 (0)