Skip to content

Commit 7031b86

Browse files
author
zida
committed
update convert code
1 parent 0605619 commit 7031b86

File tree

4 files changed

+579
-0
lines changed

4 files changed

+579
-0
lines changed

.DS_Store

0 Bytes
Binary file not shown.

bert_read_step_to_step/.DS_Store

0 Bytes
Binary file not shown.
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
# coding=utf-8
2+
# Copyright 2018 The HugginFace Inc. team.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
"""Convert BERT checkpoint."""
16+
17+
from __future__ import absolute_import
18+
from __future__ import division
19+
from __future__ import print_function
20+
21+
import re
22+
import argparse
23+
import tensorflow as tf
24+
import torch
25+
import numpy as np
26+
27+
from modeling import BertConfig, BertModel
28+
29+
parser = argparse.ArgumentParser()
30+
31+
## Required parameters
32+
parser.add_argument("--tf_checkpoint_path",
33+
default = None,
34+
type = str,
35+
required = True,
36+
help = "Path the TensorFlow checkpoint path.")
37+
parser.add_argument("--bert_config_file",
38+
default = None,
39+
type = str,
40+
required = True,
41+
help = "The config json file corresponding to the pre-trained BERT model. \n"
42+
"This specifies the model architecture.")
43+
parser.add_argument("--pytorch_dump_path",
44+
default = None,
45+
type = str,
46+
required = True,
47+
help = "Path to the output PyTorch model.")
48+
49+
args = parser.parse_args()
50+
51+
def convert():
52+
# Initialise PyTorch model
53+
config = BertConfig.from_json_file(args.bert_config_file)
54+
model = BertModel(config)
55+
56+
# Load weights from TF model
57+
path = args.tf_checkpoint_path
58+
print("Converting TensorFlow checkpoint from {}".format(path))
59+
60+
init_vars = tf.train.list_variables(path)
61+
names = []
62+
arrays = []
63+
for name, shape in init_vars:
64+
print("Loading {} with shape {}".format(name, shape))
65+
array = tf.train.load_variable(path, name)
66+
print("Numpy array shape {}".format(array.shape))
67+
names.append(name)
68+
arrays.append(array)
69+
70+
for name, array in zip(names, arrays):
71+
name = name[5:] # skip "bert/"
72+
print("Loading {}".format(name))
73+
name = name.split('/')
74+
if name[0] in ['redictions', 'eq_relationship']:
75+
print("Skipping")
76+
continue
77+
pointer = model
78+
for m_name in name:
79+
if re.fullmatch(r'[A-Za-z]+_\d+', m_name):
80+
l = re.split(r'_(\d+)', m_name)
81+
else:
82+
l = [m_name]
83+
if l[0] == 'kernel':
84+
pointer = getattr(pointer, 'weight')
85+
else:
86+
pointer = getattr(pointer, l[0])
87+
if len(l) >= 2:
88+
num = int(l[1])
89+
pointer = pointer[num]
90+
if m_name[-11:] == '_embeddings':
91+
pointer = getattr(pointer, 'weight')
92+
elif m_name == 'kernel':
93+
array = np.transpose(array)
94+
try:
95+
assert pointer.shape == array.shape
96+
except AssertionError as e:
97+
e.args += (pointer.shape, array.shape)
98+
raise
99+
pointer.data = torch.from_numpy(array)
100+
101+
# Save pytorch-model
102+
torch.save(model.state_dict(), args.pytorch_dump_path)
103+
104+
if __name__ == "__main__":
105+
convert()

0 commit comments

Comments
 (0)