Skip to content

Commit d4082af

Browse files
committed
cleaned scripts
1 parent 594f3a1 commit d4082af

File tree

2 files changed

+7
-52
lines changed

2 files changed

+7
-52
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ nohup bash bin/bert_multiclass.sh hum-event-aware data/event_aware_en/crisis_con
167167
168168
```
169169

170-
## Please cite the following papera if you are using the data:
170+
## Please cite the following paper if you are using the data:
171171

172172
* *Firoj Alam, Hassan Sajjad, Muhammad Imran and Ferda Ofli, "Standardizing and Benchmarking Crisis-related Social Media Datasets for Humanitarian Information Processing", arxiv. 2020.*
173173

bin/cnn_filter.py

Lines changed: 6 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
# -*- coding: utf-8 -*-
22
"""
3-
Created on Sat Mar 4 20:42:24 2017
4-
5-
@author: firojalam
3+
@author: Firoj Alam
4+
@date: Last update, Mar 31, 2020
65
"""
76

87

@@ -16,29 +15,12 @@
1615
import numpy as np
1716
np.random.seed(1337)
1817

19-
from keras.preprocessing.text import Tokenizer
20-
from keras.preprocessing.sequence import pad_sequences
21-
from keras.utils.np_utils import to_categorical
22-
from keras.layers import Dense, Input, Dropout, Activation, Flatten
2318
from keras.layers import Conv1D, MaxPooling1D, Embedding
24-
from keras.models import Model
25-
import sys
26-
from sklearn import preprocessing
27-
import pandas as pd
28-
import sklearn.metrics as metrics
29-
import data_process
30-
from keras.models import Sequential
31-
from keras.layers import Convolution1D, GlobalMaxPooling1D
32-
import subprocess
3319
import shlex
34-
from subprocess import Popen, PIPE
35-
import keras.backend as K
36-
#from imblearn.over_sampling import SMOTE
20+
from subprocess import Popen, PIPE
3721
from collections import Counter
3822
import random
39-
from keras.layers import merge
40-
from keras.layers.normalization import BatchNormalization
41-
from keras.layers import concatenate
23+
from keras.layers import concatenate
4224
from keras.constraints import max_norm
4325
from keras.layers import Input, Dense, Embedding, Conv2D, MaxPool2D, MaxPooling2D
4426
from keras.layers import Reshape, Flatten, Dropout, Concatenate
@@ -90,8 +72,6 @@ def text_cnn(embedding_matrix,word_index,MAX_NB_WORDS,EMBEDDING_DIM,MAX_SEQUENCE
9072
# embedding_layer=Embedding(output_dim=EMBEDDING_DIM, input_dim=nb_words, input_length=MAX_SEQUENCE_LENGTH,trainable=False)(inputs)
9173

9274
########## CNN: Filtering with Max pooling:
93-
#nb_filter = 250
94-
#filter_length = 3
9575
branches = [] # models to be merged
9676
filter_window_sizes=[2,3,4,5]
9777
pool_size=2
@@ -157,40 +137,16 @@ def kimCNN(embedding_matrix,word_index,MAX_NB_WORDS,EMBEDDING_DIM,MAX_SEQUENCE_L
157137
compiled keras model
158138
"""
159139
print('Preparing embedding matrix.')
160-
# num_words = min(MAX_NB_WORDS, len(word_index))
161-
# nb_words = min(MAX_NB_WORDS, len(word_index) + 1)
162-
# embedding_matrix = np.zeros((num_words, EMBEDDING_DIM))
163-
# for word, i in word_index.items():
164-
# if i >= MAX_NB_WORDS:
165-
# continue
166-
# embedding_vector = embeddings_index.get(word)
167-
# if embedding_vector is not None:
168-
# # words not found in embedding index will be all-zeros.
169-
# embedding_matrix[i] = embedding_vector
170-
171-
# embedding_layer = Embedding(nb_words,
172-
# EMBEDDING_DIM,
173-
# weights=[embedding_matrix],
174-
# input_length=MAX_SEQUENCE_LENGTH,
175-
# trainable=True)
176140
nb_words = min(MAX_NB_WORDS, len(word_index)+1)
177141
embedding_layer = Embedding(output_dim=EMBEDDING_DIM, input_dim=nb_words, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH,trainable=True)
178142

179-
180-
181-
print('Training model.')
182-
183-
# sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
184143
embedded_sequences = embedding_layer(sequence_input)
185144
print(embedded_sequences.shape)
186-
187-
188145
# add first conv filter
189146
embedded_sequences = Reshape((MAX_SEQUENCE_LENGTH, EMBEDDING_DIM, 1))(embedded_sequences)
190147
x = Conv2D(300, (5, EMBEDDING_DIM), activation='relu')(embedded_sequences)
191148
x = MaxPool2D((MAX_SEQUENCE_LENGTH - 5 + 1, 1))(x)
192149

193-
194150
# add second conv filter.
195151
y = Conv2D(300, (4, EMBEDDING_DIM), activation='relu')(embedded_sequences)
196152
y = MaxPool2D((MAX_SEQUENCE_LENGTH - 4 + 1, 1))(y)
@@ -207,9 +163,8 @@ def kimCNN(embedding_matrix,word_index,MAX_NB_WORDS,EMBEDDING_DIM,MAX_SEQUENCE_L
207163
# add third conv filter.
208164
w1 = Conv2D(300, (1, EMBEDDING_DIM), activation='relu')(embedded_sequences)
209165
w1 = MaxPool2D((MAX_SEQUENCE_LENGTH - 1 + 1, 1))(w1)
210-
# concate the conv layers
211-
# alpha = concatenate([x,y,z,z1])
212-
alpha = concatenate([w1,z1 ])
166+
167+
alpha = concatenate([w1,z1])
213168

214169
# flatted the pooled features.
215170
merged_model = Flatten()(alpha)

0 commit comments

Comments
 (0)