|
9 | 9 | }, |
10 | 10 | { |
11 | 11 | "cell_type": "code", |
12 | | - "execution_count": null, |
| 12 | + "execution_count": 1, |
13 | 13 | "metadata": {}, |
14 | 14 | "outputs": [ |
15 | 15 | { |
16 | 16 | "name": "stdout", |
17 | 17 | "output_type": "stream", |
18 | 18 | "text": [ |
19 | | - "Collecting gensim\n", |
20 | | - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/1d/69/1262ed0050c21f5054702b8e96a2d8c310d4cd059e4a08c9a2fe6a5dae65/gensim-3.8.3-cp35-cp35m-manylinux1_x86_64.whl (24.2MB)\n", |
21 | | - "\u001b[K 100% |████████████████████████████████| 24.2MB 930kB/s ta 0:00:011 41% |█████████████▎ | 10.1MB 5.2MB/s eta 0:00:03\n", |
22 | | - "\u001b[?25hCollecting smart-open>=1.8.1 (from gensim)\n", |
23 | | - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/74/77/744c79da6e66691e3500b6dffff29bdd787015eae817d594791edc7b719b/smart_open-2.0.0.tar.gz (103kB)\n", |
24 | | - "\u001b[K 100% |████████████████████████████████| 112kB 3.4MB/s ta 0:00:01\n", |
25 | | - "\u001b[?25hRequirement already satisfied: six>=1.5.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from gensim) (1.14.0)\n", |
26 | | - "Collecting scipy>=0.18.1 (from gensim)\n", |
27 | | - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/c1/60/8cbf00c0deb50a971e6e3a015fb32513960a92867df979870a454481817c/scipy-1.4.1-cp35-cp35m-manylinux1_x86_64.whl (26.0MB)\n", |
28 | | - "\u001b[K 100% |████████████████████████████████| 26.0MB 1.0MB/s ta 0:00:011\n", |
29 | | - "\u001b[?25hCollecting numpy>=1.11.3 (from gensim)\n", |
30 | | - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/38/92/fa5295d9755c7876cb8490eab866e1780154033fa45978d9cf74ffbd4c68/numpy-1.18.4-cp35-cp35m-manylinux1_x86_64.whl (20.0MB)\n", |
31 | | - "\u001b[K 100% |████████████████████████████████| 20.0MB 1.8MB/s eta 0:00:01\n", |
32 | | - "\u001b[?25hRequirement already satisfied: requests in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from smart-open>=1.8.1->gensim) (2.23.0)\n", |
33 | | - "Collecting boto (from smart-open>=1.8.1->gensim)\n", |
34 | | - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/23/10/c0b78c27298029e4454a472a1919bde20cb182dab1662cec7f2ca1dcc523/boto-2.49.0-py2.py3-none-any.whl (1.4MB)\n", |
35 | | - "\u001b[K 100% |████████████████████████████████| 1.4MB 4.6MB/s eta 0:00:01\n", |
36 | | - "\u001b[?25hCollecting boto3 (from smart-open>=1.8.1->gensim)\n", |
37 | | - "\u001b[?25l Downloading https://files.pythonhosted.org/packages/36/9e/e963605983fc1188c200ce84e2e07a1882c84a9e4c71cba80076b21441bb/boto3-1.13.4-py2.py3-none-any.whl (128kB)\n", |
38 | | - "\u001b[K 100% |████████████████████████████████| 133kB 6.7MB/s ta 0:00:01\n", |
39 | | - "\u001b[?25hRequirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from requests->smart-open>=1.8.1->gensim) (1.25.9)\n", |
40 | | - "Requirement already satisfied: idna<3,>=2.5 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from requests->smart-open>=1.8.1->gensim) (2.9)\n", |
41 | | - "Requirement already satisfied: chardet<4,>=3.0.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from requests->smart-open>=1.8.1->gensim) (3.0.4)\n", |
42 | | - "Requirement already satisfied: certifi>=2017.4.17 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from requests->smart-open>=1.8.1->gensim) (2018.8.24)\n", |
43 | | - "Collecting jmespath<1.0.0,>=0.7.1 (from boto3->smart-open>=1.8.1->gensim)\n", |
44 | | - " Using cached https://files.pythonhosted.org/packages/a3/43/1e939e1fcd87b827fe192d0c9fc25b48c5b3368902bfb913de7754b0dc03/jmespath-0.9.5-py2.py3-none-any.whl\n", |
45 | | - "Collecting s3transfer<0.4.0,>=0.3.0 (from boto3->smart-open>=1.8.1->gensim)\n", |
46 | | - " Using cached https://files.pythonhosted.org/packages/69/79/e6afb3d8b0b4e96cefbdc690f741d7dd24547ff1f94240c997a26fa908d3/s3transfer-0.3.3-py2.py3-none-any.whl\n", |
47 | | - "Collecting botocore<1.17.0,>=1.16.4 (from boto3->smart-open>=1.8.1->gensim)\n" |
| 19 | + "Requirement already satisfied: gensim in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (4.0.1)\n", |
| 20 | + "Requirement already satisfied: numpy>=1.11.3 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (1.20.2)\n", |
| 21 | + "Requirement already satisfied: Cython==0.29.21 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (0.29.21)\n", |
| 22 | + "Requirement already satisfied: smart-open>=1.8.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (3.0.0)\n", |
| 23 | + "Requirement already satisfied: scipy>=0.18.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (1.6.3)\n", |
| 24 | + "Requirement already satisfied: requests in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from smart-open>=1.8.1->gensim) (2.25.1)\n", |
| 25 | + "Requirement already satisfied: chardet<5,>=3.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->smart-open>=1.8.1->gensim) (4.0.0)\n", |
| 26 | + "Requirement already satisfied: idna<3,>=2.5 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->smart-open>=1.8.1->gensim) (2.10)\n", |
| 27 | + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->smart-open>=1.8.1->gensim) (1.25.9)\n", |
| 28 | + "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->smart-open>=1.8.1->gensim) (2020.12.5)\n" |
| 29 | + ] |
| 30 | + }, |
| 31 | + { |
| 32 | + "name": "stderr", |
| 33 | + "output_type": "stream", |
| 34 | + "text": [ |
| 35 | + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", |
| 36 | + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" |
| 37 | + ] |
| 38 | + }, |
| 39 | + { |
| 40 | + "name": "stdout", |
| 41 | + "output_type": "stream", |
| 42 | + "text": [ |
| 43 | + "Requirement already satisfied: nltk in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (3.6.2)" |
| 44 | + ] |
| 45 | + }, |
| 46 | + { |
| 47 | + "name": "stderr", |
| 48 | + "output_type": "stream", |
| 49 | + "text": [ |
| 50 | + "WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n", |
| 51 | + "You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n" |
| 52 | + ] |
| 53 | + }, |
| 54 | + { |
| 55 | + "name": "stdout", |
| 56 | + "output_type": "stream", |
| 57 | + "text": [ |
| 58 | + "\n", |
| 59 | + "Requirement already satisfied: joblib in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from nltk) (1.0.1)\n", |
| 60 | + "Requirement already satisfied: tqdm in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from nltk) (4.46.1)\n", |
| 61 | + "Requirement already satisfied: regex in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from nltk) (2021.4.4)\n", |
| 62 | + "Requirement already satisfied: click in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from nltk) (7.1.2)\n" |
48 | 63 | ] |
49 | 64 | } |
50 | 65 | ], |
51 | 66 | "source": [ |
52 | 67 | "!pip install gensim\n", |
53 | | - "!pip install nltk\n", |
54 | | - "#todo: add pip for downloading nltk data?" |
| 68 | + "!pip install nltk" |
55 | 69 | ] |
56 | 70 | }, |
57 | 71 | { |
|
61 | 75 | "outputs": [], |
62 | 76 | "source": [ |
63 | 77 | "from nltk.tokenize import word_tokenize\n", |
64 | | - "from gensim.models.doc2vec import Doc2Vec, TaggedDocument\n" |
| 78 | + "from gensim.models.doc2vec import Doc2Vec, TaggedDocument" |
65 | 79 | ] |
66 | 80 | }, |
67 | 81 | { |
|
70 | 84 | "metadata": {}, |
71 | 85 | "outputs": [], |
72 | 86 | "source": [ |
73 | | - "#Read the dataset’s README to understand the data format. \n", |
| 87 | + "# Read the dataset’s README to understand the data format. \n", |
| 88 | + "\n", |
74 | 89 | "data_path = \"booksummaries.txt\"\n", |
75 | 90 | "mydata = {} #titles-summaries dictionary object\n", |
76 | 91 | "for line in open(data_path, encoding=\"utf-8\"):\n", |
77 | 92 | " temp = line.split(\"\\t\")\n", |
78 | | - " mydata[temp[2]] = temp[6]\n", |
79 | | - "\n" |
| 93 | + " mydata[temp[2]] = temp[6]" |
80 | 94 | ] |
81 | 95 | }, |
82 | 96 | { |
|
90 | 104 | "model = Doc2Vec(vector_size=50, alpha=0.025, min_count=10, dm =1, epochs=100)\n", |
91 | 105 | "model.build_vocab(train_doc2vec)\n", |
92 | 106 | "model.train(train_doc2vec, total_examples=model.corpus_count, epochs=model.epochs)\n", |
93 | | - "model.save(\"d2v.model\")\n", |
94 | | - "\n" |
| 107 | + "model.save(\"d2v.model\")" |
95 | 108 | ] |
96 | 109 | }, |
97 | 110 | { |
|
117 | 130 | "Napoleon enacts changes to the governance structure of the farm, replacing meetings with a committee of pigs who will run the farm.\n", |
118 | 131 | " \"\"\"\n", |
119 | 132 | "new_vector = model.infer_vector(word_tokenize(sample))\n", |
120 | | - "sims = model.docvecs.most_similar([new_vector]) #gives 10 most similar titles\n", |
| 133 | + "sims = model.docvecs.most_similar([new_vector])\n", |
121 | 134 | "print(sims)" |
122 | 135 | ] |
123 | | - }, |
124 | | - { |
125 | | - "cell_type": "code", |
126 | | - "execution_count": null, |
127 | | - "metadata": {}, |
128 | | - "outputs": [], |
129 | | - "source": [] |
130 | 136 | } |
131 | 137 | ], |
132 | 138 | "metadata": { |
|
145 | 151 | "name": "python", |
146 | 152 | "nbconvert_exporter": "python", |
147 | 153 | "pygments_lexer": "ipython3", |
148 | | - "version": "3.5.6" |
| 154 | + "version": "3.7.9" |
149 | 155 | } |
150 | 156 | }, |
151 | 157 | "nbformat": 4, |
152 | | - "nbformat_minor": 2 |
| 158 | + "nbformat_minor": 4 |
153 | 159 | } |
0 commit comments