|
10 | 10 | }, |
11 | 11 | { |
12 | 12 | "cell_type": "code", |
13 | | - "execution_count": 1, |
| 13 | + "execution_count": 2, |
14 | 14 | "metadata": {}, |
15 | 15 | "outputs": [ |
16 | 16 | { |
17 | 17 | "name": "stdout", |
18 | 18 | "output_type": "stream", |
19 | 19 | "text": [ |
20 | | - "Requirement already satisfied: nltk in /home/etherealenvy/.local/lib/python3.6/site-packages (3.5)\n", |
21 | | - "Requirement already satisfied: tqdm in /home/etherealenvy/.local/lib/python3.6/site-packages (from nltk) (4.46.0)\n", |
22 | | - "Requirement already satisfied: click in /home/etherealenvy/.local/lib/python3.6/site-packages (from nltk) (7.1.2)\n", |
23 | | - "Requirement already satisfied: joblib in /home/etherealenvy/.local/lib/python3.6/site-packages (from nltk) (0.14.1)\n", |
24 | | - "Requirement already satisfied: regex in /home/etherealenvy/.local/lib/python3.6/site-packages (from nltk) (2020.4.4)\n", |
25 | | - "Requirement already satisfied: gensim in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (3.8.3)\n", |
26 | | - "Requirement already satisfied: smart-open>=1.8.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from gensim) (2.0.0)\n", |
27 | | - "Requirement already satisfied: six>=1.5.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from gensim) (1.14.0)\n", |
28 | | - "Requirement already satisfied: numpy>=1.11.3 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from gensim) (1.18.4)\n", |
29 | | - "Requirement already satisfied: scipy>=0.18.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from gensim) (1.4.1)\n", |
30 | | - "Requirement already satisfied: requests in /home/etherealenvy/.local/lib/python3.6/site-packages (from smart-open>=1.8.1->gensim) (2.23.0)\n", |
31 | | - "Requirement already satisfied: boto in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from smart-open>=1.8.1->gensim) (2.49.0)\n", |
32 | | - "Requirement already satisfied: boto3 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from smart-open>=1.8.1->gensim) (1.13.4)\n", |
33 | | - "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->smart-open>=1.8.1->gensim) (1.25.9)\n", |
34 | | - "Requirement already satisfied: certifi>=2017.4.17 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->smart-open>=1.8.1->gensim) (2020.4.5.1)\n", |
35 | | - "Requirement already satisfied: chardet<4,>=3.0.2 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->smart-open>=1.8.1->gensim) (3.0.4)\n", |
36 | | - "Requirement already satisfied: idna<3,>=2.5 in /home/etherealenvy/.local/lib/python3.6/site-packages (from requests->smart-open>=1.8.1->gensim) (2.9)\n", |
37 | | - "Requirement already satisfied: jmespath<1.0.0,>=0.7.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->smart-open>=1.8.1->gensim) (0.9.5)\n", |
38 | | - "Requirement already satisfied: s3transfer<0.4.0,>=0.3.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->smart-open>=1.8.1->gensim) (0.3.3)\n", |
39 | | - "Requirement already satisfied: botocore<1.17.0,>=1.16.4 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from boto3->smart-open>=1.8.1->gensim) (1.16.4)\n", |
40 | | - "Requirement already satisfied: docutils<0.16,>=0.10 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.4->boto3->smart-open>=1.8.1->gensim) (0.15.2)\n", |
41 | | - "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.4->boto3->smart-open>=1.8.1->gensim) (2.8.1)\n" |
| 20 | + "Requirement already satisfied: gensim in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (4.0.1)\n", |
| 21 | + "Requirement already satisfied: scipy>=0.18.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (1.6.3)\n", |
| 22 | + "Requirement already satisfied: Cython==0.29.21 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (0.29.21)\n", |
| 23 | + "Requirement already satisfied: smart-open>=1.8.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (5.0.0)\n", |
| 24 | + "Requirement already satisfied: numpy>=1.11.3 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (1.20.2)\n" |
| 25 | + ] |
| 26 | + }, |
| 27 | + { |
| 28 | + "name": "stderr", |
| 29 | + "output_type": "stream", |
| 30 | + "text": [ |
| 31 | + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", |
| 32 | + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" |
42 | 33 | ] |
43 | 34 | } |
44 | 35 | ], |
45 | 36 | "source": [ |
46 | | - "!pip install nltk\n", |
47 | | - "!pip install gensim" |
| 37 | + "# Import OS \n", |
| 38 | + "import os\n", |
| 39 | + "# For NLTK virtual environments are high recommended and it requires python verisions higher than 3.5\n", |
| 40 | + "!pip install gensim\n", |
| 41 | + "!pip install nltk" |
48 | 42 | ] |
49 | 43 | }, |
50 | 44 | { |
|
302 | 296 | " tokens = word_tokenize(textstring)\n", |
303 | 297 | " return [token.lower() for token in tokens if token.isalpha() and token not in stops]\n", |
304 | 298 | "\n", |
305 | | - "data_path = \"/home/etherealenvy/Downloads/booksummaries/booksummaries.txt\"\n", |
| 299 | + "# This is a sample path of your downloaded data set. This is currently set to a windows based path . \n", |
| 300 | + "# Please update it to your actual download path regradless of your choice of operating system \n", |
| 301 | + "\n", |
| 302 | + "data_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),data)\n", |
| 303 | + "\n", |
306 | 304 | "summaries = []\n", |
307 | 305 | "for line in open(data_path, encoding=\"utf-8\"):\n", |
308 | 306 | " temp = line.split(\"\\t\")\n", |
309 | 307 | " summaries.append(preprocess(temp[6]))\n", |
310 | 308 | "\n", |
311 | 309 | "# Create a dictionary representation of the documents.\n", |
| 310 | + "\n", |
312 | 311 | "dictionary = Dictionary(summaries)\n", |
| 312 | + "\n", |
313 | 313 | "# Filter infrequent or too frequent words.\n", |
| 314 | + "\n", |
314 | 315 | "dictionary.filter_extremes(no_below=10, no_above=0.5)\n", |
315 | 316 | "corpus = [dictionary.doc2bow(summary) for summary in summaries]\n", |
| 317 | + "\n", |
316 | 318 | "# Make a index to word dictionary.\n", |
| 319 | + "\n", |
317 | 320 | "temp = dictionary[0] # This is only to \"load\" the dictionary.\n", |
318 | 321 | "id2word = dictionary.id2token\n", |
| 322 | + "\n", |
319 | 323 | "#Train the topic model\n", |
| 324 | + "\n", |
320 | 325 | "model = LdaModel(corpus=corpus, id2word=id2word,iterations=400, num_topics=10)\n", |
321 | 326 | "top_topics = list(model.top_topics(corpus))\n", |
322 | 327 | "pprint(top_topics)\n" |
|
411 | 416 | { |
412 | 417 | "cell_type": "code", |
413 | 418 | "execution_count": 19, |
414 | | - "metadata": {}, |
| 419 | + "metadata": { |
| 420 | + "scrolled": true |
| 421 | + }, |
415 | 422 | "outputs": [ |
416 | 423 | { |
417 | 424 | "name": "stdout", |
|
437 | 444 | " \n", |
438 | 445 | "print(\"=\" * 20)" |
439 | 446 | ] |
440 | | - }, |
441 | | - { |
442 | | - "cell_type": "code", |
443 | | - "execution_count": null, |
444 | | - "metadata": {}, |
445 | | - "outputs": [], |
446 | | - "source": [] |
447 | | - }, |
448 | | - { |
449 | | - "cell_type": "code", |
450 | | - "execution_count": null, |
451 | | - "metadata": {}, |
452 | | - "outputs": [], |
453 | | - "source": [] |
454 | | - }, |
455 | | - { |
456 | | - "cell_type": "code", |
457 | | - "execution_count": null, |
458 | | - "metadata": {}, |
459 | | - "outputs": [], |
460 | | - "source": [] |
461 | 447 | } |
462 | 448 | ], |
463 | 449 | "metadata": { |
|
476 | 462 | "name": "python", |
477 | 463 | "nbconvert_exporter": "python", |
478 | 464 | "pygments_lexer": "ipython3", |
479 | | - "version": "3.6.10" |
| 465 | + "version": "3.7.9" |
480 | 466 | } |
481 | 467 | }, |
482 | 468 | "nbformat": 4, |
|
0 commit comments