Skip to content

Commit 50bc57c

Browse files
authored
Update Perceiver code examples (huggingface#14783)
* Fix code examples * Fix code example
1 parent 48d4827 commit 50bc57c

File tree

2 files changed

+122
-19
lines changed

2 files changed

+122
-19
lines changed

docs/source/model_doc/perceiver.mdx

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,10 @@ Tips:
8181

8282
- The quickest way to get started with the Perceiver is by checking the [tutorial
8383
notebooks](https://github.com/NielsRogge/Transformers-Tutorials/tree/master/Perceiver).
84-
- Note that the models available in the library only showcase some examples of what you can do with the Perceiver.
85-
There are many more use cases, including question answering,
86-
named-entity recognition, object detection, audio classification, video classification, etc.
84+
- Refer to the [blog post](https://huggingface.co/blog/perceiver) if you want to fully understand how the model works and
85+
is implemented in the library. Note that the models available in the library only showcase some examples of what you can do
86+
with the Perceiver. There are many more use cases, including question answering, named-entity recognition, object detection,
87+
audio classification, video classification, etc.
8788

8889
## Perceiver specific outputs
8990

@@ -102,10 +103,7 @@ named-entity recognition, object detection, audio classification, video classifi
102103
## PerceiverTokenizer
103104

104105
[[autodoc]] PerceiverTokenizer
105-
- build_inputs_with_special_tokens
106-
- get_special_tokens_mask
107-
- create_token_type_ids_from_sequences
108-
- save_vocabulary
106+
- __call__
109107

110108
## PerceiverFeatureExtractor
111109

src/transformers/models/perceiver/modeling_perceiver.py

Lines changed: 117 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -757,12 +757,7 @@ class PreTrainedModel
757757
self.encoder.layer[layer].attention.prune_heads(heads)
758758

759759
@add_start_docstrings_to_model_forward(PERCEIVER_INPUTS_DOCSTRING.format("(batch_size, sequence_length)"))
760-
@add_code_sample_docstrings(
761-
processor_class=_TOKENIZER_FOR_DOC,
762-
checkpoint=_CHECKPOINT_FOR_DOC,
763-
output_type=PerceiverModelOutput,
764-
config_class=_CONFIG_FOR_DOC,
765-
)
760+
@replace_return_docstrings(output_type=PerceiverModelOutput, config_class=_CONFIG_FOR_DOC)
766761
def forward(
767762
self,
768763
inputs,
@@ -773,6 +768,85 @@ def forward(
773768
output_hidden_states=None,
774769
return_dict=None,
775770
):
771+
r"""
772+
Returns:
773+
774+
Examples::
775+
776+
>>> from transformers import PerceiverConfig, PerceiverTokenizer, PerceiverFeatureExtractor, PerceiverModel
777+
>>> from transformers.models.perceiver.modeling_perceiver import PerceiverTextPreprocessor, PerceiverImagePreprocessor, PerceiverClassificationDecoder
778+
>>> import torch
779+
>>> import requests
780+
>>> from PIL import Image
781+
782+
>>> # EXAMPLE 1: using the Perceiver to classify texts
783+
>>> # - we define a TextPreprocessor, which can be used to embed tokens
784+
>>> # - we define a ClassificationDecoder, which can be used to decode the
785+
>>> # final hidden states of the latents to classification logits
786+
>>> # using trainable position embeddings
787+
>>> config = PerceiverConfig()
788+
>>> preprocessor = PerceiverTextPreprocessor(config)
789+
>>> decoder = PerceiverClassificationDecoder(config,
790+
... num_channels=config.d_latents,
791+
... trainable_position_encoding_kwargs=dict(num_channels=config.d_latents, index_dims=1),
792+
... use_query_residual=True)
793+
>>> model = PerceiverModel(config, input_preprocessor=preprocessor, decoder=decoder)
794+
795+
>>> # you can then do a forward pass as follows:
796+
>>> tokenizer = PerceiverTokenizer()
797+
>>> text = "hello world"
798+
>>> inputs = tokenizer(text, return_tensors="pt").input_ids
799+
800+
>>> with torch.no_grad():
801+
>>> outputs = model(inputs=inputs)
802+
>>> logits = outputs.logits
803+
804+
>>> # to train, one can train the model using standard cross-entropy:
805+
>>> criterion = torch.nn.CrossEntropyLoss()
806+
807+
>>> labels = torch.tensor([1])
808+
>>> loss = criterion(logits, labels)
809+
810+
>>> # EXAMPLE 2: using the Perceiver to classify images
811+
>>> # - we define an ImagePreprocessor, which can be used to embed images
812+
>>> preprocessor=PerceiverImagePreprocessor(
813+
config,
814+
prep_type="conv1x1",
815+
spatial_downsample=1,
816+
out_channels=256,
817+
position_encoding_type="trainable",
818+
concat_or_add_pos="concat",
819+
project_pos_dim=256,
820+
trainable_position_encoding_kwargs=dict(num_channels=256, index_dims=config.image_size ** 2),
821+
)
822+
823+
>>> model = PerceiverModel(
824+
... config,
825+
... input_preprocessor=preprocessor,
826+
... decoder=PerceiverClassificationDecoder(
827+
... config,
828+
... num_channels=config.d_latents,
829+
... trainable_position_encoding_kwargs=dict(num_channels=config.d_latents, index_dims=1),
830+
... use_query_residual=True,
831+
... ),
832+
... )
833+
834+
>>> # you can then do a forward pass as follows:
835+
>>> feature_extractor = PerceiverFeatureExtractor()
836+
>>> url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
837+
>>> image = Image.open(requests.get(url, stream=True).raw)
838+
>>> inputs = feature_extractor(image, return_tensors="pt").pixel_values
839+
840+
>>> with torch.no_grad():
841+
>>> outputs = model(inputs=inputs)
842+
>>> logits = outputs.logits
843+
844+
>>> # to train, one can train the model using standard cross-entropy:
845+
>>> criterion = torch.nn.CrossEntropyLoss()
846+
847+
>>> labels = torch.tensor([1])
848+
>>> loss = criterion(logits, labels)
849+
"""
776850
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
777851
output_hidden_states = (
778852
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
@@ -901,12 +975,7 @@ def __init__(self, config):
901975
self.post_init()
902976

903977
@add_start_docstrings_to_model_forward(PERCEIVER_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
904-
@add_code_sample_docstrings(
905-
processor_class=_TOKENIZER_FOR_DOC,
906-
checkpoint=_CHECKPOINT_FOR_DOC,
907-
output_type=PerceiverMaskedLMOutput,
908-
config_class=_CONFIG_FOR_DOC,
909-
)
978+
@replace_return_docstrings(output_type=PerceiverMaskedLMOutput, config_class=_CONFIG_FOR_DOC)
910979
def forward(
911980
self,
912981
inputs=None,
@@ -923,6 +992,42 @@ def forward(
923992
Labels for computing the masked language modeling loss. Indices should be in ``[-100, 0, ...,
924993
config.vocab_size]`` (see ``input_ids`` docstring) Tokens with indices set to ``-100`` are ignored
925994
(masked), the loss is only computed for the tokens with labels in ``[0, ..., config.vocab_size]``
995+
996+
Returns:
997+
998+
Examples::
999+
>>> from transformers import PerceiverTokenizer, PerceiverForMaskedLM
1000+
>>> import torch
1001+
1002+
>>> tokenizer = PerceiverTokenizer.from_pretrained('deepmind/language-perceiver')
1003+
>>> model = PerceiverForMaskedLM.from_pretrained('deepmind/language-perceiver')
1004+
1005+
>>> # training
1006+
>>> text = "This is an incomplete sentence where some words are missing."
1007+
>>> inputs = tokenizer(text, padding="max_length", return_tensors="pt")
1008+
>>> # mask " missing."
1009+
>>> inputs['input_ids'][0, 52:61] = tokenizer.mask_token_id
1010+
>>> labels = tokenizer(text, padding="max_length", return_tensors="pt").input_ids
1011+
1012+
>>> outputs = model(**inputs, labels=labels)
1013+
>>> loss = outputs.loss
1014+
>>> logits = outputs.logits
1015+
1016+
>>> # inference
1017+
>>> text = "This is an incomplete sentence where some words are missing."
1018+
>>> encoding = tokenizer(text, padding="max_length", return_tensors="pt")
1019+
1020+
>>> # mask bytes corresponding to " missing.". Note that the model performs much better if the masked span starts with a space.
1021+
>>> encoding['input_ids'][0, 52:61] = tokenizer.mask_token_id
1022+
1023+
>>> # forward pass
1024+
>>> with torch.no_grad():
1025+
>>> outputs = model(**encoding)
1026+
>>> logits = outputs.logits
1027+
1028+
>>> masked_tokens_predictions = logits[0, 52:61].argmax(dim=-1).tolist()
1029+
>>> tokenizer.decode(masked_tokens_predictions)
1030+
' missing.'
9261031
"""
9271032
if inputs is not None and input_ids is not None:
9281033
raise ValueError("You cannot use both `inputs` and `input_ids`")

0 commit comments

Comments
 (0)