@@ -1127,19 +1127,24 @@ def _prepare_output_docstrings(output_type, config_class, min_indent=None):
1127
1127
1128
1128
```python
1129
1129
>>> from transformers import {processor_class}, {model_class}
1130
+ >>> import torch
1130
1131
>>> from datasets import load_dataset
1131
1132
1132
1133
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
1134
+ >>> dataset = dataset.sort("id")
1133
1135
>>> sampling_rate = dataset.features["audio"].sampling_rate
1134
1136
1135
1137
>>> processor = {processor_class}.from_pretrained("{checkpoint}")
1136
1138
>>> model = {model_class}.from_pretrained("{checkpoint}")
1137
1139
1138
1140
>>> # audio file is decoded on the fly
1139
1141
>>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
1140
- >>> outputs = model(**inputs)
1142
+ >>> with torch.no_grad():
1143
+ ... outputs = model(**inputs)
1141
1144
1142
1145
>>> last_hidden_states = outputs.last_hidden_state
1146
+ >>> list(last_hidden_states.shape)
1147
+ {expected_output}
1143
1148
```
1144
1149
"""
1145
1150
@@ -1152,24 +1157,32 @@ def _prepare_output_docstrings(output_type, config_class, min_indent=None):
1152
1157
>>> import torch
1153
1158
1154
1159
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
1160
+ >>> dataset = dataset.sort("id")
1155
1161
>>> sampling_rate = dataset.features["audio"].sampling_rate
1156
1162
1157
1163
>>> processor = {processor_class}.from_pretrained("{checkpoint}")
1158
1164
>>> model = {model_class}.from_pretrained("{checkpoint}")
1159
1165
1160
1166
>>> # audio file is decoded on the fly
1161
1167
>>> inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
1162
- >>> logits = model(**inputs).logits
1168
+ >>> with torch.no_grad():
1169
+ ... logits = model(**inputs).logits
1163
1170
>>> predicted_ids = torch.argmax(logits, dim=-1)
1164
1171
1165
1172
>>> # transcribe speech
1166
1173
>>> transcription = processor.batch_decode(predicted_ids)
1174
+ >>> transcription[0]
1175
+ {expected_output}
1176
+ ```
1167
1177
1168
- >>> # compute loss
1178
+ ```python
1169
1179
>>> with processor.as_target_processor():
1170
1180
... inputs["labels"] = processor(dataset[0]["text"], return_tensors="pt").input_ids
1171
1181
1182
+ >>> # compute loss
1172
1183
>>> loss = model(**inputs).loss
1184
+ >>> round(loss.item(), 2)
1185
+ {expected_loss}
1173
1186
```
1174
1187
"""
1175
1188
@@ -1182,21 +1195,31 @@ def _prepare_output_docstrings(output_type, config_class, min_indent=None):
1182
1195
>>> import torch
1183
1196
1184
1197
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
1198
+ >>> dataset = dataset.sort("id")
1185
1199
>>> sampling_rate = dataset.features["audio"].sampling_rate
1186
1200
1187
1201
>>> feature_extractor = {processor_class}.from_pretrained("{checkpoint}")
1188
1202
>>> model = {model_class}.from_pretrained("{checkpoint}")
1189
1203
1190
1204
>>> # audio file is decoded on the fly
1191
- >>> inputs = feature_extractor(dataset[0]["audio"]["array"], return_tensors="pt")
1192
- >>> logits = model(**inputs).logits
1193
- >>> predicted_class_ids = torch.argmax(logits, dim=-1)
1205
+ >>> inputs = feature_extractor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
1206
+
1207
+ >>> with torch.no_grad():
1208
+ ... logits = model(**inputs).logits
1209
+
1210
+ >>> predicted_class_ids = torch.argmax(logits, dim=-1).item()
1194
1211
>>> predicted_label = model.config.id2label[predicted_class_ids]
1212
+ >>> predicted_label
1213
+ {expected_output}
1214
+ ```
1195
1215
1216
+ ```python
1196
1217
>>> # compute loss - target_label is e.g. "down"
1197
1218
>>> target_label = model.config.id2label[0]
1198
1219
>>> inputs["labels"] = torch.tensor([model.config.label2id[target_label]])
1199
1220
>>> loss = model(**inputs).loss
1221
+ >>> round(loss.item(), 2)
1222
+ {expected_loss}
1200
1223
```
1201
1224
"""
1202
1225
@@ -1210,17 +1233,22 @@ def _prepare_output_docstrings(output_type, config_class, min_indent=None):
1210
1233
>>> import torch
1211
1234
1212
1235
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
1236
+ >>> dataset = dataset.sort("id")
1213
1237
>>> sampling_rate = dataset.features["audio"].sampling_rate
1214
1238
1215
1239
>>> feature_extractor = {processor_class}.from_pretrained("{checkpoint}")
1216
1240
>>> model = {model_class}.from_pretrained("{checkpoint}")
1217
1241
1218
1242
>>> # audio file is decoded on the fly
1219
- >>> inputs = feature_extractor(dataset[0]["audio"]["array"], return_tensors="pt")
1220
- >>> logits = model(**inputs).logits
1243
+ >>> inputs = feature_extractor(dataset[0]["audio"]["array"], return_tensors="pt", sampling_rate=sampling_rate)
1244
+ >>> with torch.no_grad():
1245
+ ... logits = model(**inputs).logits
1246
+
1221
1247
>>> probabilities = torch.sigmoid(logits[0])
1222
1248
>>> # labels is a one-hot array of shape (num_frames, num_speakers)
1223
1249
>>> labels = (probabilities > 0.5).long()
1250
+ >>> labels[0].tolist()
1251
+ {expected_output}
1224
1252
```
1225
1253
"""
1226
1254
@@ -1234,14 +1262,19 @@ def _prepare_output_docstrings(output_type, config_class, min_indent=None):
1234
1262
>>> import torch
1235
1263
1236
1264
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
1265
+ >>> dataset = dataset.sort("id")
1237
1266
>>> sampling_rate = dataset.features["audio"].sampling_rate
1238
1267
1239
1268
>>> feature_extractor = {processor_class}.from_pretrained("{checkpoint}")
1240
1269
>>> model = {model_class}.from_pretrained("{checkpoint}")
1241
1270
1242
1271
>>> # audio file is decoded on the fly
1243
- >>> inputs = feature_extractor(dataset[:2]["audio"]["array"], return_tensors="pt")
1244
- >>> embeddings = model(**inputs).embeddings
1272
+ >>> inputs = feature_extractor(
1273
+ ... [d["array"] for d in dataset[:2]["audio"]], sampling_rate=sampling_rate, return_tensors="pt", padding=True
1274
+ ... )
1275
+ >>> with torch.no_grad():
1276
+ ... embeddings = model(**inputs).embeddings
1277
+
1245
1278
>>> embeddings = torch.nn.functional.normalize(embeddings, dim=-1).cpu()
1246
1279
1247
1280
>>> # the resulting embeddings can be used for cosine similarity-based retrieval
@@ -1250,6 +1283,8 @@ def _prepare_output_docstrings(output_type, config_class, min_indent=None):
1250
1283
>>> threshold = 0.7 # the optimal threshold is dataset-dependent
1251
1284
>>> if similarity < threshold:
1252
1285
... print("Speakers are not the same!")
1286
+ >>> round(similarity.item(), 2)
1287
+ {expected_output}
1253
1288
```
1254
1289
"""
1255
1290
@@ -1553,9 +1588,11 @@ def add_code_sample_docstrings(
1553
1588
checkpoint = None ,
1554
1589
output_type = None ,
1555
1590
config_class = None ,
1556
- mask = None ,
1591
+ mask = "[MASK]" ,
1557
1592
model_cls = None ,
1558
- modality = None
1593
+ modality = None ,
1594
+ expected_output = "" ,
1595
+ expected_loss = "" ,
1559
1596
):
1560
1597
def docstring_decorator (fn ):
1561
1598
# model_class defaults to function's class if not specified otherwise
@@ -1568,7 +1605,17 @@ def docstring_decorator(fn):
1568
1605
else :
1569
1606
sample_docstrings = PT_SAMPLE_DOCSTRINGS
1570
1607
1571
- doc_kwargs = dict (model_class = model_class , processor_class = processor_class , checkpoint = checkpoint )
1608
+ # putting all kwargs for docstrings in a dict to be used
1609
+ # with the `.format(**doc_kwargs)`. Note that string might
1610
+ # be formatted with non-existing keys, which is fine.
1611
+ doc_kwargs = dict (
1612
+ model_class = model_class ,
1613
+ processor_class = processor_class ,
1614
+ checkpoint = checkpoint ,
1615
+ mask = mask ,
1616
+ expected_output = expected_output ,
1617
+ expected_loss = expected_loss ,
1618
+ )
1572
1619
1573
1620
if "SequenceClassification" in model_class and modality == "audio" :
1574
1621
code_sample = sample_docstrings ["AudioClassification" ]
@@ -1581,7 +1628,6 @@ def docstring_decorator(fn):
1581
1628
elif "MultipleChoice" in model_class :
1582
1629
code_sample = sample_docstrings ["MultipleChoice" ]
1583
1630
elif "MaskedLM" in model_class or model_class in ["FlaubertWithLMHeadModel" , "XLMWithLMHeadModel" ]:
1584
- doc_kwargs ["mask" ] = "[MASK]" if mask is None else mask
1585
1631
code_sample = sample_docstrings ["MaskedLM" ]
1586
1632
elif "LMHead" in model_class or "CausalLM" in model_class :
1587
1633
code_sample = sample_docstrings ["LMHead" ]
0 commit comments