Skip to content

Commit ee6674d

Browse files
authored
Fix doc examples: name '...' is not defined (huggingface#14687)
* Fix doc examples: name '...' is not defined * remove >>> and ... in some docstrings in visual_bert Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
1 parent e621932 commit ee6674d

File tree

4 files changed

+130
-129
lines changed

4 files changed

+130
-129
lines changed

src/transformers/models/longformer/modeling_longformer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1764,7 +1764,7 @@ def forward(
17641764
... # check ``LongformerModel.forward`` for more details how to set `attention_mask`
17651765
>>> outputs = model(input_ids, attention_mask=attention_mask, labels=input_ids)
17661766
>>> loss = outputs.loss
1767-
>>> prediction_logits = output.logits
1767+
>>> prediction_logits = outputs.logits
17681768
"""
17691769
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
17701770

src/transformers/models/megatron_bert/modeling_megatron_bert.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1189,7 +1189,7 @@ def forward(
11891189
>>> import torch
11901190
11911191
>>> tokenizer = BertTokenizer.from_pretrained('nvidia/megatron-bert-cased-345m')
1192-
>>> model = MegatronBertLMHeadModel.from_pretrained('nvidia/megatron-bert-cased-345m', is_decoder=True)
1192+
>>> model = MegatronBertForCausalLM.from_pretrained('nvidia/megatron-bert-cased-345m', is_decoder=True)
11931193
11941194
>>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
11951195
>>> outputs = model(**inputs)

src/transformers/models/visual_bert/modeling_visual_bert.py

Lines changed: 127 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -741,27 +741,27 @@ def forward(
741741
742742
Example::
743743
744-
>>> # Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image.
745-
>>> from transformers import BertTokenizer, VisualBertModel
746-
>>> import torch
744+
# Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image.
745+
from transformers import BertTokenizer, VisualBertModel
746+
import torch
747747
748-
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
749-
>>> model = VisualBertModel.from_pretrained('uclanlp/visualbert-vqa-coco-pre')
748+
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
749+
model = VisualBertModel.from_pretrained('uclanlp/visualbert-vqa-coco-pre')
750750
751-
>>> inputs = tokenizer("The capital of France is Paris.", return_tensors="pt")
752-
>>> visual_embeds = get_visual_embeddings(image).unsqueeze(0)
753-
>>> visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
754-
>>> visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
751+
inputs = tokenizer("The capital of France is Paris.", return_tensors="pt")
752+
visual_embeds = get_visual_embeddings(image).unsqueeze(0)
753+
visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
754+
visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
755755
756-
>>> inputs.update({
757-
... "visual_embeds": visual_embeds,
758-
... "visual_token_type_ids": visual_token_type_ids,
759-
... "visual_attention_mask": visual_attention_mask
760-
... })
756+
inputs.update({
757+
"visual_embeds": visual_embeds,
758+
"visual_token_type_ids": visual_token_type_ids,
759+
"visual_attention_mask": visual_attention_mask
760+
})
761761
762-
>>> outputs = model(**inputs)
762+
outputs = model(**inputs)
763763
764-
>>> last_hidden_states = outputs.last_hidden_state
764+
last_hidden_states = outputs.last_hidden_state
765765
"""
766766

767767
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
@@ -923,31 +923,31 @@ def forward(
923923
924924
Example::
925925
926-
>>> # Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
927-
>>> from transformers import BertTokenizer, VisualBertForPreTraining
926+
# Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
927+
from transformers import BertTokenizer, VisualBertForPreTraining
928928
929-
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
930-
>>> model = VisualBertForPreTraining.from_pretrained('uclanlp/visualbert-vqa-coco-pre')
929+
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
930+
model = VisualBertForPreTraining.from_pretrained('uclanlp/visualbert-vqa-coco-pre')
931931
932-
>>> inputs = tokenizer("The capital of France is {mask}.", return_tensors="pt")
933-
>>> visual_embeds = get_visual_embeddings(image).unsqueeze(0)
934-
>>> visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
935-
>>> visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
932+
inputs = tokenizer("The capital of France is {mask}.", return_tensors="pt")
933+
visual_embeds = get_visual_embeddings(image).unsqueeze(0)
934+
visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
935+
visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
936936
937-
>>> inputs.update({
938-
... "visual_embeds": visual_embeds,
939-
... "visual_token_type_ids": visual_token_type_ids,
940-
... "visual_attention_mask": visual_attention_mask
941-
... })
942-
>>> max_length = inputs["input_ids"].shape[-1]+visual_embeds.shape[-2]
943-
>>> labels = tokenizer("The capital of France is Paris.", return_tensors="pt", padding="max_length", max_length=max_length)["input_ids"]
944-
>>> sentence_image_labels = torch.tensor(1).unsqueeze(0) # Batch_size
937+
inputs.update({
938+
"visual_embeds": visual_embeds,
939+
"visual_token_type_ids": visual_token_type_ids,
940+
"visual_attention_mask": visual_attention_mask
941+
})
942+
max_length = inputs["input_ids"].shape[-1]+visual_embeds.shape[-2]
943+
labels = tokenizer("The capital of France is Paris.", return_tensors="pt", padding="max_length", max_length=max_length)["input_ids"]
944+
sentence_image_labels = torch.tensor(1).unsqueeze(0) # Batch_size
945945
946946
947-
>>> outputs = model(**inputs, labels=labels, sentence_image_labels=sentence_image_labels)
948-
>>> loss = outputs.loss
949-
>>> prediction_logits = outputs.prediction_logits
950-
>>> seq_relationship_logits = outputs.seq_relationship_logits
947+
outputs = model(**inputs, labels=labels, sentence_image_labels=sentence_image_labels)
948+
loss = outputs.loss
949+
prediction_logits = outputs.prediction_logits
950+
seq_relationship_logits = outputs.seq_relationship_logits
951951
"""
952952
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
953953

@@ -1057,37 +1057,38 @@ def forward(
10571057
10581058
Example::
10591059
1060-
>>> from transformers import BertTokenizer, VisualBertForMultipleChoice
1061-
>>> import torch
1062-
1063-
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
1064-
>>> model = VisualBertForMultipleChoice.from_pretrained('uclanlp/visualbert-vcr')
1065-
1066-
>>> prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
1067-
>>> choice0 = "It is eaten with a fork and a knife."
1068-
>>> choice1 = "It is eaten while held in the hand."
1069-
1070-
>>> visual_embeds = get_visual_embeddings(image)
1071-
>>> # (batch_size, num_choices, visual_seq_length, visual_embedding_dim)
1072-
>>> visual_embeds = visual_embeds.expand(1, 2, *visual_embeds.shape)
1073-
>>> visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
1074-
>>> visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
1075-
1076-
>>> labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1
1077-
1078-
>>> encoding = tokenizer([[prompt, prompt], [choice0, choice1]], return_tensors='pt', padding=True)
1079-
>>> # batch size is 1
1080-
>>> inputs_dict = {k: v.unsqueeze(0) for k,v in encoding.items()}
1081-
>>> inputs_dict.update({
1082-
... "visual_embeds": visual_embeds,
1083-
... "visual_attention_mask": visual_attention_mask,
1084-
... "visual_token_type_ids": visual_token_type_ids,
1085-
... "labels": labels
1086-
... })
1087-
>>> outputs = model(**inputs_dict)
1088-
1089-
>>> loss = outputs.loss
1090-
>>> logits = outputs.logits
1060+
# Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
1061+
from transformers import BertTokenizer, VisualBertForMultipleChoice
1062+
import torch
1063+
1064+
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
1065+
model = VisualBertForMultipleChoice.from_pretrained('uclanlp/visualbert-vcr')
1066+
1067+
prompt = "In Italy, pizza served in formal settings, such as at a restaurant, is presented unsliced."
1068+
choice0 = "It is eaten with a fork and a knife."
1069+
choice1 = "It is eaten while held in the hand."
1070+
1071+
visual_embeds = get_visual_embeddings(image)
1072+
# (batch_size, num_choices, visual_seq_length, visual_embedding_dim)
1073+
visual_embeds = visual_embeds.expand(1, 2, *visual_embeds.shape)
1074+
visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
1075+
visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
1076+
1077+
labels = torch.tensor(0).unsqueeze(0) # choice0 is correct (according to Wikipedia ;)), batch size 1
1078+
1079+
encoding = tokenizer([[prompt, prompt], [choice0, choice1]], return_tensors='pt', padding=True)
1080+
# batch size is 1
1081+
inputs_dict = {k: v.unsqueeze(0) for k,v in encoding.items()}
1082+
inputs_dict.update({
1083+
"visual_embeds": visual_embeds,
1084+
"visual_attention_mask": visual_attention_mask,
1085+
"visual_token_type_ids": visual_token_type_ids,
1086+
"labels": labels
1087+
})
1088+
outputs = model(**inputs_dict)
1089+
1090+
loss = outputs.loss
1091+
logits = outputs.logits
10911092
"""
10921093
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
10931094
num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
@@ -1204,30 +1205,30 @@ def forward(
12041205
12051206
Example::
12061207
1207-
>>> # Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
1208-
>>> from transformers import BertTokenizer, VisualBertForQuestionAnswering
1209-
>>> import torch
1208+
# Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
1209+
from transformers import BertTokenizer, VisualBertForQuestionAnswering
1210+
import torch
12101211
1211-
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
1212-
>>> model = VisualBertForQuestionAnswering.from_pretrained('uclanlp/visualbert-vqa')
1212+
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
1213+
model = VisualBertForQuestionAnswering.from_pretrained('uclanlp/visualbert-vqa')
12131214
1214-
>>> text = "Who is eating the apple?"
1215-
>>> inputs = tokenizer(text, return_tensors='pt')
1216-
>>> visual_embeds = get_visual_embeddings(image).unsqueeze(0)
1217-
>>> visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
1218-
>>> visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
1215+
text = "Who is eating the apple?"
1216+
inputs = tokenizer(text, return_tensors='pt')
1217+
visual_embeds = get_visual_embeddings(image).unsqueeze(0)
1218+
visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
1219+
visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
12191220
1220-
>>> inputs.update({
1221-
... "visual_embeds": visual_embeds,
1222-
... "visual_token_type_ids": visual_token_type_ids,
1223-
... "visual_attention_mask": visual_attention_mask
1224-
... })
1221+
inputs.update({
1222+
"visual_embeds": visual_embeds,
1223+
"visual_token_type_ids": visual_token_type_ids,
1224+
"visual_attention_mask": visual_attention_mask
1225+
})
12251226
1226-
>>> labels = torch.tensor([[0.0,1.0]]).unsqueeze(0) # Batch size 1, Num labels 2
1227+
labels = torch.tensor([[0.0,1.0]]).unsqueeze(0) # Batch size 1, Num labels 2
12271228
1228-
>>> outputs = model(**inputs, labels=labels)
1229-
>>> loss = outputs.loss
1230-
>>> scores = outputs.logits
1229+
outputs = model(**inputs, labels=labels)
1230+
loss = outputs.loss
1231+
scores = outputs.logits
12311232
"""
12321233
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
12331234

@@ -1327,30 +1328,30 @@ def forward(
13271328
13281329
Example::
13291330
1330-
>>> # Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
1331-
>>> from transformers import BertTokenizer, VisualBertForVisualReasoning
1332-
>>> import torch
1331+
# Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
1332+
from transformers import BertTokenizer, VisualBertForVisualReasoning
1333+
import torch
13331334
1334-
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
1335-
>>> model = VisualBertForVisualReasoning.from_pretrained('uclanlp/visualbert-nlvr2')
1335+
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
1336+
model = VisualBertForVisualReasoning.from_pretrained('uclanlp/visualbert-nlvr2')
13361337
1337-
>>> text = "Who is eating the apple?"
1338-
>>> inputs = tokenizer(text, return_tensors='pt')
1339-
>>> visual_embeds = get_visual_embeddings(image).unsqueeze(0)
1340-
>>> visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
1341-
>>> visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
1338+
text = "Who is eating the apple?"
1339+
inputs = tokenizer(text, return_tensors='pt')
1340+
visual_embeds = get_visual_embeddings(image).unsqueeze(0)
1341+
visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
1342+
visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
13421343
1343-
>>> inputs.update({
1344-
... "visual_embeds": visual_embeds,
1345-
... "visual_token_type_ids": visual_token_type_ids,
1346-
... "visual_attention_mask": visual_attention_mask
1347-
... })
1344+
inputs.update({
1345+
"visual_embeds": visual_embeds,
1346+
"visual_token_type_ids": visual_token_type_ids,
1347+
"visual_attention_mask": visual_attention_mask
1348+
})
13481349
1349-
>>> labels = torch.tensor(1).unsqueeze(0) # Batch size 1, Num choices 2
1350+
labels = torch.tensor(1).unsqueeze(0) # Batch size 1, Num choices 2
13501351
1351-
>>> outputs = model(**inputs, labels=labels)
1352-
>>> loss = outputs.loss
1353-
>>> scores = outputs.logits
1352+
outputs = model(**inputs, labels=labels)
1353+
loss = outputs.loss
1354+
scores = outputs.logits
13541355
"""
13551356
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
13561357

@@ -1488,32 +1489,32 @@ def forward(
14881489
14891490
Example::
14901491
1491-
>>> # Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
1492-
>>> from transformers import BertTokenizer, VisualBertForRegionToPhraseAlignment
1493-
>>> import torch
1492+
# Assumption: `get_visual_embeddings(image)` gets the visual embeddings of the image in the batch.
1493+
from transformers import BertTokenizer, VisualBertForRegionToPhraseAlignment
1494+
import torch
14941495
1495-
>>> tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
1496-
>>> model = VisualBertForRegionToPhraseAlignment.from_pretrained('uclanlp/visualbert-vqa-coco-pre')
1496+
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
1497+
model = VisualBertForRegionToPhraseAlignment.from_pretrained('uclanlp/visualbert-vqa-coco-pre')
14971498
1498-
>>> text = "Who is eating the apple?"
1499-
>>> inputs = tokenizer(text, return_tensors='pt')
1500-
>>> visual_embeds = get_visual_embeddings(image).unsqueeze(0)
1501-
>>> visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
1502-
>>> visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
1503-
>>> region_to_phrase_position = torch.ones((1, inputs["input_ids"].shape[-1]+visual_embeds.shape[-2]))
1499+
text = "Who is eating the apple?"
1500+
inputs = tokenizer(text, return_tensors='pt')
1501+
visual_embeds = get_visual_embeddings(image).unsqueeze(0)
1502+
visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
1503+
visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.float)
1504+
region_to_phrase_position = torch.ones((1, inputs["input_ids"].shape[-1]+visual_embeds.shape[-2]))
15041505
1505-
>>> inputs.update({
1506-
... "region_to_phrase_position": region_to_phrase_position,
1507-
... "visual_embeds": visual_embeds,
1508-
... "visual_token_type_ids": visual_token_type_ids,
1509-
... "visual_attention_mask": visual_attention_mask
1510-
... })
1506+
inputs.update({
1507+
"region_to_phrase_position": region_to_phrase_position,
1508+
"visual_embeds": visual_embeds,
1509+
"visual_token_type_ids": visual_token_type_ids,
1510+
"visual_attention_mask": visual_attention_mask
1511+
})
15111512
1512-
>>> labels = torch.ones((1, inputs["input_ids"].shape[-1]+visual_embeds.shape[-2], visual_embeds.shape[-2])) # Batch size 1
1513+
labels = torch.ones((1, inputs["input_ids"].shape[-1]+visual_embeds.shape[-2], visual_embeds.shape[-2])) # Batch size 1
15131514
1514-
>>> outputs = model(**inputs, labels=labels)
1515-
>>> loss = outputs.loss
1516-
>>> scores = outputs.logits
1515+
outputs = model(**inputs, labels=labels)
1516+
loss = outputs.loss
1517+
scores = outputs.logits
15171518
"""
15181519
if region_to_phrase_position is None:
15191520
raise ValueError("`region_to_phrase_position` should not be None when using Flickr Model.")

src/transformers/models/wav2vec2/modeling_wav2vec2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1517,7 +1517,7 @@ def forward(
15171517
15181518
Example::
15191519
1520-
>>> from transformers import Wav2Vec2Processor, Wav2Vec2Model
1520+
>>> from transformers import Wav2Vec2Processor, Wav2Vec2ForMaskedLM
15211521
>>> from datasets import load_dataset
15221522
>>> import soundfile as sf
15231523

0 commit comments

Comments
 (0)