From 51d22e610cb96fde23ddefc5a46fa2284bc09aef Mon Sep 17 00:00:00 2001 From: Kostadin Cvejoski Date: Tue, 8 Apr 2025 21:51:06 +0200 Subject: [PATCH] set the dtype to float32 during tokenization when no padding is needed --- easy_tpp/preprocess/event_tokenizer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/easy_tpp/preprocess/event_tokenizer.py b/easy_tpp/preprocess/event_tokenizer.py index b863dac..5253906 100644 --- a/easy_tpp/preprocess/event_tokenizer.py +++ b/easy_tpp/preprocess/event_tokenizer.py @@ -414,9 +414,9 @@ def _pad( max_len=max_length, dtype=np.int64) else: - batch_output[self.model_input_names[0]] = np.array(encoded_inputs[self.model_input_names[0]]) - batch_output[self.model_input_names[1]] = np.array(encoded_inputs[self.model_input_names[1]]) - batch_output[self.model_input_names[2]] = np.array(encoded_inputs[self.model_input_names[2]]) + batch_output[self.model_input_names[0]] = np.array(encoded_inputs[self.model_input_names[0]], dtype=np.float32) + batch_output[self.model_input_names[1]] = np.array(encoded_inputs[self.model_input_names[1]], dtype=np.float32) + batch_output[self.model_input_names[2]] = np.array(encoded_inputs[self.model_input_names[2]], dtype=np.int64) # non_pad_mask; replaced the use of event types by using the original sequence length seq_pad_mask = np.full_like(batch_output[self.model_input_names[2]], fill_value=True, dtype=bool)