test

BigNoobWasTaken · BigNoobWasTaken · commit b18b62c47c61 · 2024-01-08T17:34:19.000+07:00
diff --git a/src/tensor_array/core/libTensorArray_Core.so b/src/tensor_array/core/libTensorArray_Core.so
diff --git a/src/tensor_array/layers/attention/__init__.py b/src/tensor_array/layers/attention/__init__.py
@@ -0,0 +1 @@
+from tensor_array.layers.attention.attention import MultiheadAttention
diff --git a/src/tensor_array/layers/attention/attention.py b/src/tensor_array/layers/attention/attention.py
@@ -0,0 +1,33 @@
+from typing import Any
+from .. import Layer
+from ..util import Linear
+from tensor_array.core import Tensor
+
+def scaled_dot_product_attention(q, k, v, mask = None):
+    attn_scores = q @ k.transpose(len(k.shape()) - 2, len(k.shape()) - 1)
+    attn_probs = SoftMax(attn_scores, len(attn_scores.shape()) - 1)
+    return attn_probs @ v
+
+class MultiheadAttention(Layer):
+    def __init__(self, d_model, n_head) -> None:
+        super().__init__()
+        self.linear_q = Linear(d_model)
+        self.linear_k = Linear(d_model)
+        self.linear_v = Linear(d_model)
+        self.linear_o = Linear(d_model)
+        self.n_head = n_head
+
+    def calculate(self, input_q, input_k, input_v, mask = None) -> Any:
+        temp_q = self.linear_q(input_q)
+        temp_k = self.linear_k(input_k)
+        temp_v = self.linear_v(input_v)
+
+        temp_q = temp_q.reshape((temp_q.shape()[0], temp_q.shape()[1], self.n_head, temp_q.shape()[-1] / self.n_head)).transpose(1, 2)
+        temp_k = temp_k.reshape((temp_k.shape()[0], temp_k.shape()[1], self.n_head, temp_k.shape()[-1] / self.n_head)).transpose(1, 2)
+        temp_v = temp_v.reshape((temp_v.shape()[0], temp_v.shape()[1], self.n_head, temp_v.shape()[-1] / self.n_head)).transpose(1, 2)
+
+        attention_output = scaled_dot_product_attention(temp_q, temp_k, temp_v, mask)
+
+        attention_output = attention_output.transpose(1, 2)
+        attention_output = attention_output.reshape((temp_q.shape()[0], temp_q.shape()[1], temp_q.shape[-2] * temp_q.shape[-1]))
+        return self.linear_o(attention_output)
diff --git a/src/tensor_array/layers/attention/transformer.py b/src/tensor_array/layers/attention/transformer.py
@@ -0,0 +1,23 @@
+from typing import Any
+from .. import Layer
+from .attention import MultiheadAttention
+from ..util import Sequential
+from ..util import Linear
+from ..util import Activation
+
+class TransformerEncoderImpl(Layer):
+    def __init__(self, d_model, n_head, ff_size) -> None:
+        self.feed_forward = Sequential([
+            Linear(ff_size),
+            Activation(ReLU),
+            Linear(d_model)
+        ])
+        self.multihead_attn = MultiheadAttention(d_model, n_head)
+        self.layer_norm_1
+        self.layer_norm_2
+
+    def calculate(self, input) -> Any:
+        attn_output = self.multihead_attn(input, input, input)
+        attn_output = self.layer_norm_1(input + attn_output)
+        ff_output = self.feed_forward(attn_output)
+        return self.layer_norm_2(attn_output + ff_output)
diff --git a/src/tensor_array/layers/layer.py b/src/tensor_array/layers/layer.py
@@ -21,17 +21,17 @@ def __init__(self) -> None:
 
     def __call__(self, *args: Any, **kwds: Any) -> Any:
         if not self.__dict__['is_running']:
-            list_arg = ((t.shape(), t.dtype()) for t in args if isinstance(t, Tensor))
+            list_arg = (t.shape() for t in args if isinstance(t, Tensor))
             dict_kwargs = {
-                key: (val.shape(), val.dtype())
+                key: val.shape()
                 for key, val in kwds
                 if isinstance(val, Tensor)
             }
-            self.init_value(*list_arg, **dict_kwargs)
+            self.layer_init(*list_arg, **dict_kwargs)
         super().__setattr__('is_running', True)
         return self.calculate(*args, **kwds)
 
-    def init_value(self, *args: Tuple, **kwds: Tuple) -> None:
+    def layer_init(self, *args: Tuple, **kwds: Tuple) -> None:
         pass
 
     def calculate(self, *args: Any, **kwds: Any) -> Any:
diff --git a/src/tensor_array/layers/util/__init__.py b/src/tensor_array/layers/util/__init__.py
@@ -1 +1,3 @@
-from tensor_array.layers.util.linear import Linear
+from tensor_array.layers.util.activation import Activation
+from tensor_array.layers.util.linear import Linear
+from tensor_array.layers.util.sequential import Sequential
diff --git a/src/tensor_array/layers/util/activation.py b/src/tensor_array/layers/util/activation.py
@@ -0,0 +1,10 @@
+from .. import Layer
+from typing import Any, Callable
+
+class Activation(Layer):
+    def __init__(self, activation_function: Callable) -> None:
+        super().__init__()
+        self.activation_function = activation_function
+
+    def calculate(self, *args: Any, **kwds: Any) -> Any:
+        return self.activation_function(*args, **kwds)
diff --git a/src/tensor_array/layers/util/linear.py b/src/tensor_array/layers/util/linear.py
@@ -12,9 +12,8 @@ def __init__(self, bias) -> None:
         self.bias_shape = bias
         self.b = Parameter(zeros(shape = (bias,), dtype = DataType.FLOAT))
 
-    def init_value(self, t):
-        shape, dtype = t
-        self.w = Parameter(zeros(shape = (shape[-1], self.bias_shape), dtype = dtype))
+    def layer_init(self, t):
+        self.w = Parameter(zeros(shape = (t[-1], self.bias_shape), dtype = DataType.FLOAT))
     
     def calculate(self, t):
         return t @ self.w + self.b
diff --git a/src/tensor_array/layers/util/sequential.py b/src/tensor_array/layers/util/sequential.py
@@ -0,0 +1,17 @@
+from .. import Layer
+from .. import Parameter
+from tensor_array.core import Tensor
+from tensor_array.core import zeros
+from tensor_array.core import DataType
+from typing import Any, List, OrderedDict
+
+
+class Sequential(Layer):
+    def __init__(self, _layers: OrderedDict[str, Layer]) -> None:
+        self._layers = _layers
+    
+    def calculate(self, t):
+        tensorloop = t
+        for _, content in self._layers:
+            tensorloop = content(tensorloop)
+        return tensorloop
diff --git a/tensor-array-repo/Tensor-Array b/tensor-array-repo/Tensor-Array
@@ -1 +1 @@
-Subproject commit 157702e717d311948c4dce7e8f2800794d0316fd
+Subproject commit 3257b48eb67365c1010f7bb7cb32fe3b752e4578

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+from tensor_array.layers.attention.attention import MultiheadAttention`