Skip to content

Commit ad42029

Browse files
committed
NLP: Update Notebook 02 with windows compatibility
1 parent 0a29e67 commit ad42029

File tree

1 file changed

+35
-49
lines changed

1 file changed

+35
-49
lines changed

Ch7/02_TopicModelling.ipynb

Lines changed: 35 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -10,41 +10,35 @@
1010
},
1111
{
1212
"cell_type": "code",
13-
"execution_count": 1,
13+
"execution_count": 2,
1414
"metadata": {},
1515
"outputs": [
1616
{
1717
"name": "stdout",
1818
"output_type": "stream",
1919
"text": [
20-
"Requirement already satisfied: nltk in /home/etherealenvy/.local/lib/python3.6/site-packages (3.5)\n",
21-
"Requirement already satisfied: tqdm in /home/etherealenvy/.local/lib/python3.6/site-packages (from nltk) (4.46.0)\n",
22-
"Requirement already satisfied: click in /home/etherealenvy/.local/lib/python3.6/site-packages (from nltk) (7.1.2)\n",
23-
"Requirement already satisfied: joblib in /home/etherealenvy/.local/lib/python3.6/site-packages (from nltk) (0.14.1)\n",
24-
"Requirement already satisfied: regex in /home/etherealenvy/.local/lib/python3.6/site-packages (from nltk) (2020.4.4)\n",
25-
"Requirement already satisfied: gensim in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (3.8.3)\n",
26-
"Requirement already satisfied: smart-open>=1.8.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from gensim) (2.0.0)\n",
27-
"Requirement already satisfied: six>=1.5.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from gensim) (1.14.0)\n",
28-
"Requirement already satisfied: numpy>=1.11.3 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from gensim) (1.18.4)\n",
29-
"Requirement already satisfied: scipy>=0.18.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from gensim) (1.4.1)\n",
30-
"Requirement already satisfied: requests in /home/etherealenvy/.local/lib/python3.6/site-packages (from smart-open>=1.8.1->gensim) (2.23.0)\n",
31-
"Requirement already satisfied: boto in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from smart-open>=1.8.1->gensim) (2.49.0)\n",
32-
"Requirement already satisfied: boto3 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from smart-open>=1.8.1->gensim) (1.13.4)\n",
33-
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->smart-open>=1.8.1->gensim) (1.25.9)\n",
34-
"Requirement already satisfied: certifi>=2017.4.17 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->smart-open>=1.8.1->gensim) (2020.4.5.1)\n",
35-
"Requirement already satisfied: chardet<4,>=3.0.2 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->smart-open>=1.8.1->gensim) (3.0.4)\n",
36-
"Requirement already satisfied: idna<3,>=2.5 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->smart-open>=1.8.1->gensim) (2.9)\n",
37-
"Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->smart-open>=1.8.1->gensim) (0.9.5)\n",
38-
"Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->smart-open>=1.8.1->gensim) (0.3.3)\n",
39-
"Requirement already satisfied: botocore<1.17.0,>=1.16.4 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->smart-open>=1.8.1->gensim) (1.16.4)\n",
40-
"Requirement already satisfied: docutils<0.16,>=0.10 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.4->boto3->smart-open>=1.8.1->gensim) (0.15.2)\n",
41-
"Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.4->boto3->smart-open>=1.8.1->gensim) (2.8.1)\n"
20+
"Requirement already satisfied: gensim in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (4.0.1)\n",
21+
"Requirement already satisfied: scipy>=0.18.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (1.6.3)\n",
22+
"Requirement already satisfied: Cython==0.29.21 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (0.29.21)\n",
23+
"Requirement already satisfied: smart-open>=1.8.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (5.0.0)\n",
24+
"Requirement already satisfied: numpy>=1.11.3 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (1.20.2)\n"
25+
]
26+
},
27+
{
28+
"name": "stderr",
29+
"output_type": "stream",
30+
"text": [
31+
"WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n",
32+
"You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n"
4233
]
4334
}
4435
],
4536
"source": [
46-
"!pip install nltk\n",
47-
"!pip install gensim"
37+
"# Import OS \n",
38+
"import os\n",
39+
"# For NLTK virtual environments are high recommended and it requires python verisions higher than 3.5\n",
40+
"!pip install gensim\n",
41+
"!pip install nltk"
4842
]
4943
},
5044
{
@@ -302,21 +296,32 @@
302296
" tokens = word_tokenize(textstring)\n",
303297
" return [token.lower() for token in tokens if token.isalpha() and token not in stops]\n",
304298
"\n",
305-
"data_path = \"/home/etherealenvy/Downloads/booksummaries/booksummaries.txt\"\n",
299+
"# This is a sample path of your downloaded data set. This is currently set to a windows based path . \n",
300+
"# Please update it to your actual download path regradless of your choice of operating system \n",
301+
"\n",
302+
"data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),data)\n",
303+
"\n",
306304
"summaries = []\n",
307305
"for line in open(data_path, encoding=\"utf-8\"):\n",
308306
" temp = line.split(\"\\t\")\n",
309307
" summaries.append(preprocess(temp[6]))\n",
310308
"\n",
311309
"# Create a dictionary representation of the documents.\n",
310+
"\n",
312311
"dictionary = Dictionary(summaries)\n",
312+
"\n",
313313
"# Filter infrequent or too frequent words.\n",
314+
"\n",
314315
"dictionary.filter_extremes(no_below=10, no_above=0.5)\n",
315316
"corpus = [dictionary.doc2bow(summary) for summary in summaries]\n",
317+
"\n",
316318
"# Make a index to word dictionary.\n",
319+
"\n",
317320
"temp = dictionary[0] # This is only to \"load\" the dictionary.\n",
318321
"id2word = dictionary.id2token\n",
322+
"\n",
319323
"#Train the topic model\n",
324+
"\n",
320325
"model = LdaModel(corpus=corpus, id2word=id2word,iterations=400, num_topics=10)\n",
321326
"top_topics = list(model.top_topics(corpus))\n",
322327
"pprint(top_topics)\n"
@@ -411,7 +416,9 @@
411416
{
412417
"cell_type": "code",
413418
"execution_count": 19,
414-
"metadata": {},
419+
"metadata": {
420+
"scrolled": true
421+
},
415422
"outputs": [
416423
{
417424
"name": "stdout",
@@ -437,27 +444,6 @@
437444
" \n",
438445
"print(\"=\" * 20)"
439446
]
440-
},
441-
{
442-
"cell_type": "code",
443-
"execution_count": null,
444-
"metadata": {},
445-
"outputs": [],
446-
"source": []
447-
},
448-
{
449-
"cell_type": "code",
450-
"execution_count": null,
451-
"metadata": {},
452-
"outputs": [],
453-
"source": []
454-
},
455-
{
456-
"cell_type": "code",
457-
"execution_count": null,
458-
"metadata": {},
459-
"outputs": [],
460-
"source": []
461447
}
462448
],
463449
"metadata": {
@@ -476,7 +462,7 @@
476462
"name": "python",
477463
"nbconvert_exporter": "python",
478464
"pygments_lexer": "ipython3",
479-
"version": "3.6.10"
465+
"version": "3.7.9"
480466
}
481467
},
482468
"nbformat": 4,

0 commit comments

Comments
 (0)