Skip to content

Commit b18b62c

Browse files
test
1 parent 366797c commit b18b62c

File tree

10 files changed

+94
-9
lines changed

10 files changed

+94
-9
lines changed
32 Bytes
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from tensor_array.layers.attention.attention import MultiheadAttention
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from typing import Any
2+
from .. import Layer
3+
from ..util import Linear
4+
from tensor_array.core import Tensor
5+
6+
def scaled_dot_product_attention(q, k, v, mask = None):
7+
attn_scores = q @ k.transpose(len(k.shape()) - 2, len(k.shape()) - 1)
8+
attn_probs = SoftMax(attn_scores, len(attn_scores.shape()) - 1)
9+
return attn_probs @ v
10+
11+
class MultiheadAttention(Layer):
12+
def __init__(self, d_model, n_head) -> None:
13+
super().__init__()
14+
self.linear_q = Linear(d_model)
15+
self.linear_k = Linear(d_model)
16+
self.linear_v = Linear(d_model)
17+
self.linear_o = Linear(d_model)
18+
self.n_head = n_head
19+
20+
def calculate(self, input_q, input_k, input_v, mask = None) -> Any:
21+
temp_q = self.linear_q(input_q)
22+
temp_k = self.linear_k(input_k)
23+
temp_v = self.linear_v(input_v)
24+
25+
temp_q = temp_q.reshape((temp_q.shape()[0], temp_q.shape()[1], self.n_head, temp_q.shape()[-1] / self.n_head)).transpose(1, 2)
26+
temp_k = temp_k.reshape((temp_k.shape()[0], temp_k.shape()[1], self.n_head, temp_k.shape()[-1] / self.n_head)).transpose(1, 2)
27+
temp_v = temp_v.reshape((temp_v.shape()[0], temp_v.shape()[1], self.n_head, temp_v.shape()[-1] / self.n_head)).transpose(1, 2)
28+
29+
attention_output = scaled_dot_product_attention(temp_q, temp_k, temp_v, mask)
30+
31+
attention_output = attention_output.transpose(1, 2)
32+
attention_output = attention_output.reshape((temp_q.shape()[0], temp_q.shape()[1], temp_q.shape[-2] * temp_q.shape[-1]))
33+
return self.linear_o(attention_output)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from typing import Any
2+
from .. import Layer
3+
from .attention import MultiheadAttention
4+
from ..util import Sequential
5+
from ..util import Linear
6+
from ..util import Activation
7+
8+
class TransformerEncoderImpl(Layer):
9+
def __init__(self, d_model, n_head, ff_size) -> None:
10+
self.feed_forward = Sequential([
11+
Linear(ff_size),
12+
Activation(ReLU),
13+
Linear(d_model)
14+
])
15+
self.multihead_attn = MultiheadAttention(d_model, n_head)
16+
self.layer_norm_1
17+
self.layer_norm_2
18+
19+
def calculate(self, input) -> Any:
20+
attn_output = self.multihead_attn(input, input, input)
21+
attn_output = self.layer_norm_1(input + attn_output)
22+
ff_output = self.feed_forward(attn_output)
23+
return self.layer_norm_2(attn_output + ff_output)

src/tensor_array/layers/layer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,17 @@ def __init__(self) -> None:
2121

2222
def __call__(self, *args: Any, **kwds: Any) -> Any:
2323
if not self.__dict__['is_running']:
24-
list_arg = ((t.shape(), t.dtype()) for t in args if isinstance(t, Tensor))
24+
list_arg = (t.shape() for t in args if isinstance(t, Tensor))
2525
dict_kwargs = {
26-
key: (val.shape(), val.dtype())
26+
key: val.shape()
2727
for key, val in kwds
2828
if isinstance(val, Tensor)
2929
}
30-
self.init_value(*list_arg, **dict_kwargs)
30+
self.layer_init(*list_arg, **dict_kwargs)
3131
super().__setattr__('is_running', True)
3232
return self.calculate(*args, **kwds)
3333

34-
def init_value(self, *args: Tuple, **kwds: Tuple) -> None:
34+
def layer_init(self, *args: Tuple, **kwds: Tuple) -> None:
3535
pass
3636

3737
def calculate(self, *args: Any, **kwds: Any) -> Any:
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
1-
from tensor_array.layers.util.linear import Linear
1+
from tensor_array.layers.util.activation import Activation
2+
from tensor_array.layers.util.linear import Linear
3+
from tensor_array.layers.util.sequential import Sequential
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from .. import Layer
2+
from typing import Any, Callable
3+
4+
class Activation(Layer):
5+
def __init__(self, activation_function: Callable) -> None:
6+
super().__init__()
7+
self.activation_function = activation_function
8+
9+
def calculate(self, *args: Any, **kwds: Any) -> Any:
10+
return self.activation_function(*args, **kwds)

src/tensor_array/layers/util/linear.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,8 @@ def __init__(self, bias) -> None:
1212
self.bias_shape = bias
1313
self.b = Parameter(zeros(shape = (bias,), dtype = DataType.FLOAT))
1414

15-
def init_value(self, t):
16-
shape, dtype = t
17-
self.w = Parameter(zeros(shape = (shape[-1], self.bias_shape), dtype = dtype))
15+
def layer_init(self, t):
16+
self.w = Parameter(zeros(shape = (t[-1], self.bias_shape), dtype = DataType.FLOAT))
1817

1918
def calculate(self, t):
2019
return t @ self.w + self.b
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from .. import Layer
2+
from .. import Parameter
3+
from tensor_array.core import Tensor
4+
from tensor_array.core import zeros
5+
from tensor_array.core import DataType
6+
from typing import Any, List, OrderedDict
7+
8+
9+
class Sequential(Layer):
10+
def __init__(self, _layers: OrderedDict[str, Layer]) -> None:
11+
self._layers = _layers
12+
13+
def calculate(self, t):
14+
tensorloop = t
15+
for _, content in self._layers:
16+
tensorloop = content(tensorloop)
17+
return tensorloop

0 commit comments

Comments
 (0)