11# -*- coding: utf-8 -*-
22"""
3- Created on Sat Mar 4 20:42:24 2017
4-
5- @author: firojalam
3+ @author: Firoj Alam
4+ @date: Last update, Mar 31, 2020
65"""
76
87
1615import numpy as np
1716np .random .seed (1337 )
1817
19- from keras .preprocessing .text import Tokenizer
20- from keras .preprocessing .sequence import pad_sequences
21- from keras .utils .np_utils import to_categorical
22- from keras .layers import Dense , Input , Dropout , Activation , Flatten
2318from keras .layers import Conv1D , MaxPooling1D , Embedding
24- from keras .models import Model
25- import sys
26- from sklearn import preprocessing
27- import pandas as pd
28- import sklearn .metrics as metrics
29- import data_process
30- from keras .models import Sequential
31- from keras .layers import Convolution1D , GlobalMaxPooling1D
32- import subprocess
3319import shlex
34- from subprocess import Popen , PIPE
35- import keras .backend as K
36- #from imblearn.over_sampling import SMOTE
20+ from subprocess import Popen , PIPE
3721from collections import Counter
3822import random
39- from keras .layers import merge
40- from keras .layers .normalization import BatchNormalization
41- from keras .layers import concatenate
23+ from keras .layers import concatenate
4224from keras .constraints import max_norm
4325from keras .layers import Input , Dense , Embedding , Conv2D , MaxPool2D , MaxPooling2D
4426from keras .layers import Reshape , Flatten , Dropout , Concatenate
@@ -90,8 +72,6 @@ def text_cnn(embedding_matrix,word_index,MAX_NB_WORDS,EMBEDDING_DIM,MAX_SEQUENCE
9072 # embedding_layer=Embedding(output_dim=EMBEDDING_DIM, input_dim=nb_words, input_length=MAX_SEQUENCE_LENGTH,trainable=False)(inputs)
9173
9274 ########## CNN: Filtering with Max pooling:
93- #nb_filter = 250
94- #filter_length = 3
9575 branches = [] # models to be merged
9676 filter_window_sizes = [2 ,3 ,4 ,5 ]
9777 pool_size = 2
@@ -157,40 +137,16 @@ def kimCNN(embedding_matrix,word_index,MAX_NB_WORDS,EMBEDDING_DIM,MAX_SEQUENCE_L
157137 compiled keras model
158138 """
159139 print ('Preparing embedding matrix.' )
160- # num_words = min(MAX_NB_WORDS, len(word_index))
161- # nb_words = min(MAX_NB_WORDS, len(word_index) + 1)
162- # embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))
163- # for word, i in word_index.items():
164- # if i >= MAX_NB_WORDS:
165- # continue
166- # embedding_vector = embeddings_index.get(word)
167- # if embedding_vector is not None:
168- # # words not found in embedding index will be all-zeros.
169- # embedding_matrix[i] = embedding_vector
170-
171- # embedding_layer = Embedding(nb_words,
172- # EMBEDDING_DIM,
173- # weights=[embedding_matrix],
174- # input_length=MAX_SEQUENCE_LENGTH,
175- # trainable=True)
176140 nb_words = min (MAX_NB_WORDS , len (word_index )+ 1 )
177141 embedding_layer = Embedding (output_dim = EMBEDDING_DIM , input_dim = nb_words , weights = [embedding_matrix ], input_length = MAX_SEQUENCE_LENGTH ,trainable = True )
178142
179-
180-
181- print ('Training model.' )
182-
183- # sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
184143 embedded_sequences = embedding_layer (sequence_input )
185144 print (embedded_sequences .shape )
186-
187-
188145 # add first conv filter
189146 embedded_sequences = Reshape ((MAX_SEQUENCE_LENGTH , EMBEDDING_DIM , 1 ))(embedded_sequences )
190147 x = Conv2D (300 , (5 , EMBEDDING_DIM ), activation = 'relu' )(embedded_sequences )
191148 x = MaxPool2D ((MAX_SEQUENCE_LENGTH - 5 + 1 , 1 ))(x )
192149
193-
194150 # add second conv filter.
195151 y = Conv2D (300 , (4 , EMBEDDING_DIM ), activation = 'relu' )(embedded_sequences )
196152 y = MaxPool2D ((MAX_SEQUENCE_LENGTH - 4 + 1 , 1 ))(y )
@@ -207,9 +163,8 @@ def kimCNN(embedding_matrix,word_index,MAX_NB_WORDS,EMBEDDING_DIM,MAX_SEQUENCE_L
207163 # add third conv filter.
208164 w1 = Conv2D (300 , (1 , EMBEDDING_DIM ), activation = 'relu' )(embedded_sequences )
209165 w1 = MaxPool2D ((MAX_SEQUENCE_LENGTH - 1 + 1 , 1 ))(w1 )
210- # concate the conv layers
211- # alpha = concatenate([x,y,z,z1])
212- alpha = concatenate ([w1 ,z1 ])
166+
167+ alpha = concatenate ([w1 ,z1 ])
213168
214169 # flatted the pooled features.
215170 merged_model = Flatten ()(alpha )
0 commit comments