Fakerycoder
diff --git a/‎src/transformers/models/albert/__init__.py
Lines changed: 2 additions & 2 deletions b/‎src/transformers/models/albert/__init__.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/transformers/models/albert/configuration_albert.py
Lines changed: 35 additions & 28 deletions b/‎src/transformers/models/albert/configuration_albert.py
Lines changed: 35 additions & 28 deletions
diff --git a/‎src/transformers/models/bart/__init__.py
Lines changed: 2 additions & 4 deletions b/‎src/transformers/models/bart/__init__.py
Lines changed: 2 additions & 4 deletions
diff --git a/‎src/transformers/models/bart/configuration_bart.py
Lines changed: 39 additions & 35 deletions b/‎src/transformers/models/bart/configuration_bart.py
Lines changed: 39 additions & 35 deletions
diff --git a/‎src/transformers/models/bert/__init__.py
Lines changed: 2 additions & 2 deletions b/‎src/transformers/models/bert/__init__.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/transformers/models/bert/configuration_bert.py
Lines changed: 32 additions & 27 deletions b/‎src/transformers/models/bert/configuration_bert.py
Lines changed: 32 additions & 27 deletions
diff --git a/‎src/transformers/models/distilbert/__init__.py
Lines changed: 2 additions & 4 deletions b/‎src/transformers/models/distilbert/__init__.py
Lines changed: 2 additions & 4 deletions
@@ -28,7 +28,7 @@
 
 
 _import_structure = {
-    "configuration_albert": ["ALBERT_ONNX_CONFIG", "ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "AlbertConfig"],
+    "configuration_albert": ["ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "AlbertConfig", "AlbertOnnxConfig"],
 }
 
 if is_sentencepiece_available():
@@ -67,7 +67,7 @@
 
 
 if TYPE_CHECKING:
-    from .configuration_albert import ALBERT_ONNX_CONFIG, ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig
+    from .configuration_albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig, AlbertOnnxConfig
 
     if is_sentencepiece_available():
         from .tokenization_albert import AlbertTokenizer
 
@@ -14,10 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ ALBERT model configuration """
+from typing import Mapping, Optional, Any
 
 from ...configuration_utils import PretrainedConfig
-from ...onnx import OnnxConfig, OnnxVariable
-
+from ...onnx import OnnxConfig, DEFAULT_BERT_OPTIMIZER_FEATURES
 
 ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
     "albert-base-v1": "https://huggingface.co/albert-base-v1/resolve/main/config.json",
@@ -154,29 +154,36 @@ def __init__(
         self.position_embedding_type = position_embedding_type
 
 
-ALBERT_ONNX_CONFIG = OnnxConfig(
-    inputs=[
-        OnnxVariable("input_ids", {0: "batch", 1: "sequence"}, repeated=1, value=None),
-        OnnxVariable("attention_mask", {0: "batch", 1: "sequence"}, repeated=1, value=None),
-        OnnxVariable("token_type_ids", {0: "batch", 1: "sequence"}, repeated=1, value=None),
-    ],
-    outputs=[
-        OnnxVariable("last_hidden_state", {0: "batch", 1: "sequence"}, repeated=1, value=None),
-        OnnxVariable("pooler_output", {0: "batch"}, repeated=1, value=None),
-    ],
-    runtime_config_overrides=None,
-    use_external_data_format=False,
-    minimum_required_onnx_opset=12,
-    optimizer="bert",
-    optimizer_features={
-        "enable_gelu": True,
-        "enable_layer_norm": True,
-        "enable_attention": True,
-        "enable_skip_layer_norm": True,
-        "enable_embed_layer_norm": True,
-        "enable_bias_skip_layer_norm": True,
-        "enable_bias_gelu": True,
-        "enable_gelu_approximation": False,
-    },
-    optimizer_additional_args={"num_heads": "$config.num_attention_heads", "hidden_size": "$config.hidden_size"},
-)
+# Copied from transformers.models.bert.configuration_bert.BertOnnxConfig with Bert->Albert
+class AlbertOnnxConfig(OnnxConfig):
+
+    @property
+    def inputs(self) -> Mapping[str, Mapping[int, str]]:
+        return {
+            "input_ids": {0: "batch", 1: "sequence"},
+            "attention_mask": {0: "batch", 1: "sequence"},
+            "token_type_ids": {0: "batch", 1: "sequence"},
+        }
+
+    @property
+    def outputs(self) -> Mapping[str, Mapping[int, str]]:
+        return {
+            "last_hidden_state": {0: "batch", 1: "sequence"},
+            "pooler_output": {0: "batch"}
+        }
+
+    @property
+    def optimizer(self) -> Optional[str]:
+        return "bert"
+
+    @property
+    def optimizer_features(self) -> Optional[Mapping[str, bool]]:
+        return DEFAULT_BERT_OPTIMIZER_FEATURES
+
+    @property
+    def optimizer_additional_args(self) -> Optional[Mapping[str, Any]]:
+        return {
+            "num_heads": self._config.num_attention_heads,
+            "hidden_size": self._config.hidden_size
+        }
+
@@ -28,10 +28,9 @@
 
 _import_structure = {
     "configuration_bart": [
-        "BART_ONNX_CONFIG",
-        "BART_ONNX_CONFIG_WITH_PAST",
         "BART_PRETRAINED_CONFIG_ARCHIVE_MAP",
         "BartConfig",
+        "BartOnnxConfig"
     ],
     "tokenization_bart": ["BartTokenizer"],
 }
@@ -65,10 +64,9 @@
 
 if TYPE_CHECKING:
     from .configuration_bart import (
-        BART_ONNX_CONFIG,
-        BART_ONNX_CONFIG_WITH_PAST,
         BART_PRETRAINED_CONFIG_ARCHIVE_MAP,
         BartConfig,
+        BartOnnxConfig
     )
     from .tokenization_bart import BartTokenizer
 
 
@@ -14,9 +14,10 @@
 # limitations under the License.
 """ BART model configuration """
 import warnings
+from typing import Mapping, Optional, Any
 
 from ...configuration_utils import PretrainedConfig
-from ...onnx import OnnxConfig, OnnxVariable
+from ...onnx import OnnxConfigWithPast, DEFAULT_BERT_OPTIMIZER_FEATURES
 from ...utils import logging
 
 
@@ -189,37 +190,40 @@ def hidden_size(self) -> int:
         return self.d_model
 
 
-BART_ONNX_CONFIG = OnnxConfig(
-    inputs=[
-        OnnxVariable("input_ids", {0: "batch", 1: "sequence"}, repeated=1, value=None),
-        OnnxVariable("attention_mask", {0: "batch", 1: "sequence"}, repeated=1, value=None),
-    ],
-    outputs=[
-        OnnxVariable("last_hidden_state", {0: "batch", 1: "sequence"}, repeated=1, value=None),
-        OnnxVariable("encoder_last_hidden_state", {0: "batch", 1: "sequence"}, repeated=1, value=None),
-    ],
-    runtime_config_overrides={"use_cache": False},
-    use_external_data_format=False,
-    minimum_required_onnx_opset=11,
-    optimizer="bert",
-    optimizer_features=None,
-    optimizer_additional_args={"num_heads": "$config.decoder_attention_heads", "hidden_size": "$config.d_model"},
-)
-
-BART_ONNX_CONFIG_WITH_PAST = OnnxConfig(
-    inputs=[
-        OnnxVariable("input_ids", {0: "batch", 1: "sequence"}, repeated=1, value=None),
-        OnnxVariable("attention_mask", {0: "batch", 1: "sequence"}, repeated=1, value=None),
-    ],
-    outputs=[
-        OnnxVariable("last_hidden_state", {0: "batch", 1: "sequence"}, repeated=1, value=None),
-        OnnxVariable("past_keys", {0: "batch", 2: "sequence"}, repeated="$config.decoder_layers * 4", value=None),
-        OnnxVariable("encoder_last_hidden_state", {0: "batch", 1: "sequence"}, repeated=1, value=None),
-    ],
-    runtime_config_overrides={"use_cache": True},
-    use_external_data_format=False,
-    minimum_required_onnx_opset=11,
-    optimizer="bert",
-    optimizer_features=None,
-    optimizer_additional_args={"num_heads": "$config.decoder_attention_heads", "hidden_size": "$config.d_model"},
-)
+class BartOnnxConfig(OnnxConfigWithPast):
+
+    @property
+    def inputs(self) -> Mapping[str, Mapping[int, str]]:
+        return {
+            "input_ids": {0: "batch", 1: "sequence"},
+            "attention_mask": {0: "batch", 1: "sequence"},
+        }
+
+    @property
+    def outputs(self) -> Mapping[str, Mapping[int, str]]:
+        if self.use_past:
+            return {
+                "last_hidden_state": {0: "batch", 1: "sequence"},
+                "past_keys": {0: "batch", 2: "sequence"},
+                "encoder_last_hidden_state": {0: "batch", 1: "sequence"},
+            }
+        else:
+            return {
+                "last_hidden_state": {0: "batch", 1: "sequence"},
+                "encoder_last_hidden_state": {0: "batch", 1: "sequence"},
+            }
+
+    @property
+    def optimizer(self) -> Optional[str]:
+        return "bert"
+
+    @property
+    def optimizer_features(self) -> Optional[Mapping[str, bool]]:
+        return DEFAULT_BERT_OPTIMIZER_FEATURES
+
+    @property
+    def optimizer_additional_args(self) -> Optional[Mapping[str, Any]]:
+        return {
+            "num_heads": self._config.decoder_attention_heads,
+            "hidden_size": self._config.d_model
+        }
@@ -28,7 +28,7 @@
 
 
 _import_structure = {
-    "configuration_bert": ["BERT_ONNX_CONFIG", "BERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "BertConfig"],
+    "configuration_bert": ["BERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "BertConfig", "BertOnnxConfig"],
     "tokenization_bert": ["BasicTokenizer", "BertTokenizer", "WordpieceTokenizer"],
 }
 
@@ -83,7 +83,7 @@
     ]
 
 if TYPE_CHECKING:
-    from .configuration_bert import BERT_ONNX_CONFIG, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BertConfig
+    from .configuration_bert import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BertConfig, BertOnnxConfig
     from .tokenization_bert import BasicTokenizer, BertTokenizer, WordpieceTokenizer
 
     if is_tokenizers_available():
 
@@ -14,9 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ BERT model configuration """
+from typing import Mapping, Optional, Any
 
 from ...configuration_utils import PretrainedConfig
-from ...onnx.config import OnnxConfig, OnnxVariable
+from ...onnx import OnnxConfig, DEFAULT_BERT_OPTIMIZER_FEATURES
 from ...utils import logging
 
 
@@ -157,29 +158,33 @@ def __init__(
         self.use_cache = use_cache
 
 
-BERT_ONNX_CONFIG = OnnxConfig(
-    inputs=[
-        OnnxVariable("input_ids", {0: "batch", 1: "sequence"}, repeated=1, value=None),
-        OnnxVariable("attention_mask", {0: "batch", 1: "sequence"}, repeated=1, value=None),
-        OnnxVariable("token_type_ids", {0: "batch", 1: "sequence"}, repeated=1, value=None),
-    ],
-    outputs=[
-        OnnxVariable("last_hidden_state", {0: "batch", 1: "sequence"}, repeated=1, value=None),
-        OnnxVariable("pooler_output", {0: "batch"}, repeated=1, value=None),
-    ],
-    runtime_config_overrides=None,
-    use_external_data_format=False,
-    minimum_required_onnx_opset=12,
-    optimizer="bert",
-    optimizer_features={
-        "enable_gelu": True,
-        "enable_layer_norm": True,
-        "enable_attention": True,
-        "enable_skip_layer_norm": True,
-        "enable_embed_layer_norm": True,
-        "enable_bias_skip_layer_norm": True,
-        "enable_bias_gelu": True,
-        "enable_gelu_approximation": False,
-    },
-    optimizer_additional_args={"num_heads": "$config.num_attention_heads", "hidden_size": "$config.hidden_size"},
-)
+class BertOnnxConfig(OnnxConfig):
+    @property
+    def inputs(self) -> Mapping[str, Mapping[int, str]]:
+        return {
+            "input_ids": {0: "batch", 1: "sequence"},
+            "attention_mask": {0: "batch", 1: "sequence"},
+            "token_type_ids": {0: "batch", 1: "sequence"},
+        }
+
+    @property
+    def outputs(self) -> Mapping[str, Mapping[int, str]]:
+        return {
+            "last_hidden_state": {0: "batch", 1: "sequence"},
+            "pooler_output": {0: "batch"}
+        }
+
+    @property
+    def optimizer(self) -> Optional[str]:
+        return "bert"
+
+    @property
+    def optimizer_features(self) -> Optional[Mapping[str, bool]]:
+        return DEFAULT_BERT_OPTIMIZER_FEATURES
+
+    @property
+    def optimizer_additional_args(self) -> Optional[Mapping[str, Any]]:
+        return {
+            "num_heads": self._config.num_attention_heads,
+            "hidden_size": self._config.hidden_size
+        }
@@ -23,10 +23,9 @@
 
 _import_structure = {
     "configuration_distilbert": [
-        "DISTILBERT_ONNX_CONFIG",
         "DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP",
-        "DISTILBERT_TOKEN_CLASSIFICATION_ONNX_CONFIG",
         "DistilBertConfig",
+        "DistilBertOnnxConfig"
     ],
     "tokenization_distilbert": ["DistilBertTokenizer"],
 }
@@ -62,10 +61,9 @@
 
 if TYPE_CHECKING:
     from .configuration_distilbert import (
-        DISTILBERT_ONNX_CONFIG,
         DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP,
-        DISTILBERT_TOKEN_CLASSIFICATION_ONNX_CONFIG,
         DistilBertConfig,
+        DistilBertOnnxConfig
     )
     from .tokenization_distilbert import DistilBertTokenizer
Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,7 @@`
`28`	`28`
`29`	`29`
`30`	`30`	`_import_structure = {`
`31`		`- "configuration_bert": ["BERT_ONNX_CONFIG", "BERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "BertConfig"],`
	`31`	`+ "configuration_bert": ["BERT_PRETRAINED_CONFIG_ARCHIVE_MAP", "BertConfig", "BertOnnxConfig"],`
`32`	`32`	`"tokenization_bert": ["BasicTokenizer", "BertTokenizer", "WordpieceTokenizer"],`
`33`	`33`	`}`
`34`	`34`
`@@ -83,7 +83,7 @@`
`83`	`83`	`]`
`84`	`84`
`85`	`85`	`if TYPE_CHECKING:`
`86`		`- from .configuration_bert import BERT_ONNX_CONFIG, BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BertConfig`
	`86`	`+ from .configuration_bert import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BertConfig, BertOnnxConfig`
`87`	`87`	`from .tokenization_bert import BasicTokenizer, BertTokenizer, WordpieceTokenizer`
`88`	`88`
`89`	`89`	`if is_tokenizers_available():`