Skip to content

Commit bff6a21

Browse files
committed
NLP: Update Notebook 04 with windows compatibility
1 parent ea09bda commit bff6a21

File tree

1 file changed

+54
-48
lines changed

1 file changed

+54
-48
lines changed

Ch7/04_RecommenderSystems.ipynb

Lines changed: 54 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -9,49 +9,63 @@
99
},
1010
{
1111
"cell_type": "code",
12-
"execution_count": null,
12+
"execution_count": 1,
1313
"metadata": {},
1414
"outputs": [
1515
{
1616
"name": "stdout",
1717
"output_type": "stream",
1818
"text": [
19-
"Collecting gensim\n",
20-
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/1d/69/1262ed0050c21f5054702b8e96a2d8c310d4cd059e4a08c9a2fe6a5dae65/gensim-3.8.3-cp35-cp35m-manylinux1_x86_64.whl (24.2MB)\n",
21-
"\u001b[K 100% |████████████████████████████████| 24.2MB 930kB/s ta 0:00:011 41% |█████████████▎ | 10.1MB 5.2MB/s eta 0:00:03\n",
22-
"\u001b[?25hCollecting smart-open>=1.8.1 (from gensim)\n",
23-
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/74/77/744c79da6e66691e3500b6dffff29bdd787015eae817d594791edc7b719b/smart_open-2.0.0.tar.gz (103kB)\n",
24-
"\u001b[K 100% |████████████████████████████████| 112kB 3.4MB/s ta 0:00:01\n",
25-
"\u001b[?25hRequirement already satisfied: six>=1.5.0 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from gensim) (1.14.0)\n",
26-
"Collecting scipy>=0.18.1 (from gensim)\n",
27-
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/c1/60/8cbf00c0deb50a971e6e3a015fb32513960a92867df979870a454481817c/scipy-1.4.1-cp35-cp35m-manylinux1_x86_64.whl (26.0MB)\n",
28-
"\u001b[K 100% |████████████████████████████████| 26.0MB 1.0MB/s ta 0:00:011\n",
29-
"\u001b[?25hCollecting numpy>=1.11.3 (from gensim)\n",
30-
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/38/92/fa5295d9755c7876cb8490eab866e1780154033fa45978d9cf74ffbd4c68/numpy-1.18.4-cp35-cp35m-manylinux1_x86_64.whl (20.0MB)\n",
31-
"\u001b[K 100% |████████████████████████████████| 20.0MB 1.8MB/s eta 0:00:01\n",
32-
"\u001b[?25hRequirement already satisfied: requests in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from smart-open>=1.8.1->gensim) (2.23.0)\n",
33-
"Collecting boto (from smart-open>=1.8.1->gensim)\n",
34-
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/23/10/c0b78c27298029e4454a472a1919bde20cb182dab1662cec7f2ca1dcc523/boto-2.49.0-py2.py3-none-any.whl (1.4MB)\n",
35-
"\u001b[K 100% |████████████████████████████████| 1.4MB 4.6MB/s eta 0:00:01\n",
36-
"\u001b[?25hCollecting boto3 (from smart-open>=1.8.1->gensim)\n",
37-
"\u001b[?25l Downloading https://files.pythonhosted.org/packages/36/9e/e963605983fc1188c200ce84e2e07a1882c84a9e4c71cba80076b21441bb/boto3-1.13.4-py2.py3-none-any.whl (128kB)\n",
38-
"\u001b[K 100% |████████████████████████████████| 133kB 6.7MB/s ta 0:00:01\n",
39-
"\u001b[?25hRequirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from requests->smart-open>=1.8.1->gensim) (1.25.9)\n",
40-
"Requirement already satisfied: idna<3,>=2.5 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from requests->smart-open>=1.8.1->gensim) (2.9)\n",
41-
"Requirement already satisfied: chardet<4,>=3.0.2 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from requests->smart-open>=1.8.1->gensim) (3.0.4)\n",
42-
"Requirement already satisfied: certifi>=2017.4.17 in /home/etherealenvy/miniconda3/envs/practicalnlp/lib/python3.5/site-packages (from requests->smart-open>=1.8.1->gensim) (2018.8.24)\n",
43-
"Collecting jmespath<1.0.0,>=0.7.1 (from boto3->smart-open>=1.8.1->gensim)\n",
44-
" Using cached https://files.pythonhosted.org/packages/a3/43/1e939e1fcd87b827fe192d0c9fc25b48c5b3368902bfb913de7754b0dc03/jmespath-0.9.5-py2.py3-none-any.whl\n",
45-
"Collecting s3transfer<0.4.0,>=0.3.0 (from boto3->smart-open>=1.8.1->gensim)\n",
46-
" Using cached https://files.pythonhosted.org/packages/69/79/e6afb3d8b0b4e96cefbdc690f741d7dd24547ff1f94240c997a26fa908d3/s3transfer-0.3.3-py2.py3-none-any.whl\n",
47-
"Collecting botocore<1.17.0,>=1.16.4 (from boto3->smart-open>=1.8.1->gensim)\n"
19+
"Requirement already satisfied: gensim in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (4.0.1)\n",
20+
"Requirement already satisfied: numpy>=1.11.3 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (1.20.2)\n",
21+
"Requirement already satisfied: Cython==0.29.21 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (0.29.21)\n",
22+
"Requirement already satisfied: smart-open>=1.8.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (3.0.0)\n",
23+
"Requirement already satisfied: scipy>=0.18.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from gensim) (1.6.3)\n",
24+
"Requirement already satisfied: requests in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from smart-open>=1.8.1->gensim) (2.25.1)\n",
25+
"Requirement already satisfied: chardet<5,>=3.0.2 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->smart-open>=1.8.1->gensim) (4.0.0)\n",
26+
"Requirement already satisfied: idna<3,>=2.5 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->smart-open>=1.8.1->gensim) (2.10)\n",
27+
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->smart-open>=1.8.1->gensim) (1.25.9)\n",
28+
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from requests->smart-open>=1.8.1->gensim) (2020.12.5)\n"
29+
]
30+
},
31+
{
32+
"name": "stderr",
33+
"output_type": "stream",
34+
"text": [
35+
"WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n",
36+
"You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n"
37+
]
38+
},
39+
{
40+
"name": "stdout",
41+
"output_type": "stream",
42+
"text": [
43+
"Requirement already satisfied: nltk in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (3.6.2)"
44+
]
45+
},
46+
{
47+
"name": "stderr",
48+
"output_type": "stream",
49+
"text": [
50+
"WARNING: You are using pip version 20.1.1; however, version 21.1.1 is available.\n",
51+
"You should consider upgrading via the 'c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\python.exe -m pip install --upgrade pip' command.\n"
52+
]
53+
},
54+
{
55+
"name": "stdout",
56+
"output_type": "stream",
57+
"text": [
58+
"\n",
59+
"Requirement already satisfied: joblib in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from nltk) (1.0.1)\n",
60+
"Requirement already satisfied: tqdm in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from nltk) (4.46.1)\n",
61+
"Requirement already satisfied: regex in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from nltk) (2021.4.4)\n",
62+
"Requirement already satisfied: click in c:\\users\\sukee\\appdata\\local\\programs\\python\\python37\\lib\\site-packages (from nltk) (7.1.2)\n"
4863
]
4964
}
5065
],
5166
"source": [
5267
"!pip install gensim\n",
53-
"!pip install nltk\n",
54-
"#todo: add pip for downloading nltk data?"
68+
"!pip install nltk"
5569
]
5670
},
5771
{
@@ -61,7 +75,7 @@
6175
"outputs": [],
6276
"source": [
6377
"from nltk.tokenize import word_tokenize\n",
64-
"from gensim.models.doc2vec import Doc2Vec, TaggedDocument\n"
78+
"from gensim.models.doc2vec import Doc2Vec, TaggedDocument"
6579
]
6680
},
6781
{
@@ -70,13 +84,13 @@
7084
"metadata": {},
7185
"outputs": [],
7286
"source": [
73-
"#Read the dataset’s README to understand the data format. \n",
87+
"# Read the dataset’s README to understand the data format. \n",
88+
"\n",
7489
"data_path = \"booksummaries.txt\"\n",
7590
"mydata = {} #titles-summaries dictionary object\n",
7691
"for line in open(data_path, encoding=\"utf-8\"):\n",
7792
" temp = line.split(\"\\t\")\n",
78-
" mydata[temp[2]] = temp[6]\n",
79-
"\n"
93+
" mydata[temp[2]] = temp[6]"
8094
]
8195
},
8296
{
@@ -90,8 +104,7 @@
90104
"model = Doc2Vec(vector_size=50, alpha=0.025, min_count=10, dm =1, epochs=100)\n",
91105
"model.build_vocab(train_doc2vec)\n",
92106
"model.train(train_doc2vec, total_examples=model.corpus_count, epochs=model.epochs)\n",
93-
"model.save(\"d2v.model\")\n",
94-
"\n"
107+
"model.save(\"d2v.model\")"
95108
]
96109
},
97110
{
@@ -117,16 +130,9 @@
117130
"Napoleon enacts changes to the governance structure of the farm, replacing meetings with a committee of pigs who will run the farm.\n",
118131
" \"\"\"\n",
119132
"new_vector = model.infer_vector(word_tokenize(sample))\n",
120-
"sims = model.docvecs.most_similar([new_vector]) #gives 10 most similar titles\n",
133+
"sims = model.docvecs.most_similar([new_vector])\n",
121134
"print(sims)"
122135
]
123-
},
124-
{
125-
"cell_type": "code",
126-
"execution_count": null,
127-
"metadata": {},
128-
"outputs": [],
129-
"source": []
130136
}
131137
],
132138
"metadata": {
@@ -145,9 +151,9 @@
145151
"name": "python",
146152
"nbconvert_exporter": "python",
147153
"pygments_lexer": "ipython3",
148-
"version": "3.5.6"
154+
"version": "3.7.9"
149155
}
150156
},
151157
"nbformat": 4,
152-
"nbformat_minor": 2
158+
"nbformat_minor": 4
153159
}

0 commit comments

Comments
 (0)