Amazing-Python-Scripts

Форк
0
/
movie-recommender-system.ipynb 
2377 строк · 89.5 Кб
1
{
2
 "cells": [
3
  {
4
   "cell_type": "code",
5
   "execution_count": 1,
6
   "id": "1857ea93",
7
   "metadata": {},
8
   "outputs": [],
9
   "source": [
10
    "import numpy as np\n",
11
    "import pandas as pd"
12
   ]
13
  },
14
  {
15
   "cell_type": "code",
16
   "execution_count": 2,
17
   "id": "c936f055",
18
   "metadata": {},
19
   "outputs": [],
20
   "source": [
21
    "movies = pd.read_csv('tmdb_5000_movies.csv')\n",
22
    "credits = pd.read_csv('tmdb_5000_credits.csv') "
23
   ]
24
  },
25
  {
26
   "cell_type": "code",
27
   "execution_count": 3,
28
   "id": "62a46c65",
29
   "metadata": {},
30
   "outputs": [
31
    {
32
     "data": {
33
      "text/html": [
34
       "<div>\n",
35
       "<style scoped>\n",
36
       "    .dataframe tbody tr th:only-of-type {\n",
37
       "        vertical-align: middle;\n",
38
       "    }\n",
39
       "\n",
40
       "    .dataframe tbody tr th {\n",
41
       "        vertical-align: top;\n",
42
       "    }\n",
43
       "\n",
44
       "    .dataframe thead th {\n",
45
       "        text-align: right;\n",
46
       "    }\n",
47
       "</style>\n",
48
       "<table border=\"1\" class=\"dataframe\">\n",
49
       "  <thead>\n",
50
       "    <tr style=\"text-align: right;\">\n",
51
       "      <th></th>\n",
52
       "      <th>budget</th>\n",
53
       "      <th>genres</th>\n",
54
       "      <th>homepage</th>\n",
55
       "      <th>id</th>\n",
56
       "      <th>keywords</th>\n",
57
       "      <th>original_language</th>\n",
58
       "      <th>original_title</th>\n",
59
       "      <th>overview</th>\n",
60
       "      <th>popularity</th>\n",
61
       "      <th>production_companies</th>\n",
62
       "      <th>production_countries</th>\n",
63
       "      <th>release_date</th>\n",
64
       "      <th>revenue</th>\n",
65
       "      <th>runtime</th>\n",
66
       "      <th>spoken_languages</th>\n",
67
       "      <th>status</th>\n",
68
       "      <th>tagline</th>\n",
69
       "      <th>title</th>\n",
70
       "      <th>vote_average</th>\n",
71
       "      <th>vote_count</th>\n",
72
       "    </tr>\n",
73
       "  </thead>\n",
74
       "  <tbody>\n",
75
       "    <tr>\n",
76
       "      <th>0</th>\n",
77
       "      <td>237000000</td>\n",
78
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
79
       "      <td>http://www.avatarmovie.com/</td>\n",
80
       "      <td>19995</td>\n",
81
       "      <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n",
82
       "      <td>en</td>\n",
83
       "      <td>Avatar</td>\n",
84
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
85
       "      <td>150.437577</td>\n",
86
       "      <td>[{\"name\": \"Ingenious Film Partners\", \"id\": 289...</td>\n",
87
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
88
       "      <td>2009-12-10</td>\n",
89
       "      <td>2787965087</td>\n",
90
       "      <td>162.0</td>\n",
91
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...</td>\n",
92
       "      <td>Released</td>\n",
93
       "      <td>Enter the World of Pandora.</td>\n",
94
       "      <td>Avatar</td>\n",
95
       "      <td>7.2</td>\n",
96
       "      <td>11800</td>\n",
97
       "    </tr>\n",
98
       "    <tr>\n",
99
       "      <th>1</th>\n",
100
       "      <td>300000000</td>\n",
101
       "      <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...</td>\n",
102
       "      <td>http://disney.go.com/disneypictures/pirates/</td>\n",
103
       "      <td>285</td>\n",
104
       "      <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n",
105
       "      <td>en</td>\n",
106
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
107
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
108
       "      <td>139.082615</td>\n",
109
       "      <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...</td>\n",
110
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
111
       "      <td>2007-05-19</td>\n",
112
       "      <td>961000000</td>\n",
113
       "      <td>169.0</td>\n",
114
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
115
       "      <td>Released</td>\n",
116
       "      <td>At the end of the world, the adventure begins.</td>\n",
117
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
118
       "      <td>6.9</td>\n",
119
       "      <td>4500</td>\n",
120
       "    </tr>\n",
121
       "    <tr>\n",
122
       "      <th>2</th>\n",
123
       "      <td>245000000</td>\n",
124
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
125
       "      <td>http://www.sonypictures.com/movies/spectre/</td>\n",
126
       "      <td>206647</td>\n",
127
       "      <td>[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...</td>\n",
128
       "      <td>en</td>\n",
129
       "      <td>Spectre</td>\n",
130
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
131
       "      <td>107.376788</td>\n",
132
       "      <td>[{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"nam...</td>\n",
133
       "      <td>[{\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"...</td>\n",
134
       "      <td>2015-10-26</td>\n",
135
       "      <td>880674609</td>\n",
136
       "      <td>148.0</td>\n",
137
       "      <td>[{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"},...</td>\n",
138
       "      <td>Released</td>\n",
139
       "      <td>A Plan No One Escapes</td>\n",
140
       "      <td>Spectre</td>\n",
141
       "      <td>6.3</td>\n",
142
       "      <td>4466</td>\n",
143
       "    </tr>\n",
144
       "    <tr>\n",
145
       "      <th>3</th>\n",
146
       "      <td>250000000</td>\n",
147
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...</td>\n",
148
       "      <td>http://www.thedarkknightrises.com/</td>\n",
149
       "      <td>49026</td>\n",
150
       "      <td>[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...</td>\n",
151
       "      <td>en</td>\n",
152
       "      <td>The Dark Knight Rises</td>\n",
153
       "      <td>Following the death of District Attorney Harve...</td>\n",
154
       "      <td>112.312950</td>\n",
155
       "      <td>[{\"name\": \"Legendary Pictures\", \"id\": 923}, {\"...</td>\n",
156
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
157
       "      <td>2012-07-16</td>\n",
158
       "      <td>1084939099</td>\n",
159
       "      <td>165.0</td>\n",
160
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
161
       "      <td>Released</td>\n",
162
       "      <td>The Legend Ends</td>\n",
163
       "      <td>The Dark Knight Rises</td>\n",
164
       "      <td>7.6</td>\n",
165
       "      <td>9106</td>\n",
166
       "    </tr>\n",
167
       "    <tr>\n",
168
       "      <th>4</th>\n",
169
       "      <td>260000000</td>\n",
170
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
171
       "      <td>http://movies.disney.com/john-carter</td>\n",
172
       "      <td>49529</td>\n",
173
       "      <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n",
174
       "      <td>en</td>\n",
175
       "      <td>John Carter</td>\n",
176
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
177
       "      <td>43.926995</td>\n",
178
       "      <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}]</td>\n",
179
       "      <td>[{\"iso_3166_1\": \"US\", \"name\": \"United States o...</td>\n",
180
       "      <td>2012-03-07</td>\n",
181
       "      <td>284139100</td>\n",
182
       "      <td>132.0</td>\n",
183
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
184
       "      <td>Released</td>\n",
185
       "      <td>Lost in our world, found in another.</td>\n",
186
       "      <td>John Carter</td>\n",
187
       "      <td>6.1</td>\n",
188
       "      <td>2124</td>\n",
189
       "    </tr>\n",
190
       "  </tbody>\n",
191
       "</table>\n",
192
       "</div>"
193
      ],
194
      "text/plain": [
195
       "      budget                                             genres  \\\n",
196
       "0  237000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
197
       "1  300000000  [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...   \n",
198
       "2  245000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
199
       "3  250000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...   \n",
200
       "4  260000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
201
       "\n",
202
       "                                       homepage      id  \\\n",
203
       "0                   http://www.avatarmovie.com/   19995   \n",
204
       "1  http://disney.go.com/disneypictures/pirates/     285   \n",
205
       "2   http://www.sonypictures.com/movies/spectre/  206647   \n",
206
       "3            http://www.thedarkknightrises.com/   49026   \n",
207
       "4          http://movies.disney.com/john-carter   49529   \n",
208
       "\n",
209
       "                                            keywords original_language  \\\n",
210
       "0  [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...                en   \n",
211
       "1  [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...                en   \n",
212
       "2  [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...                en   \n",
213
       "3  [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...                en   \n",
214
       "4  [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...                en   \n",
215
       "\n",
216
       "                             original_title  \\\n",
217
       "0                                    Avatar   \n",
218
       "1  Pirates of the Caribbean: At World's End   \n",
219
       "2                                   Spectre   \n",
220
       "3                     The Dark Knight Rises   \n",
221
       "4                               John Carter   \n",
222
       "\n",
223
       "                                            overview  popularity  \\\n",
224
       "0  In the 22nd century, a paraplegic Marine is di...  150.437577   \n",
225
       "1  Captain Barbossa, long believed to be dead, ha...  139.082615   \n",
226
       "2  A cryptic message from Bond’s past sends him o...  107.376788   \n",
227
       "3  Following the death of District Attorney Harve...  112.312950   \n",
228
       "4  John Carter is a war-weary, former military ca...   43.926995   \n",
229
       "\n",
230
       "                                production_companies  \\\n",
231
       "0  [{\"name\": \"Ingenious Film Partners\", \"id\": 289...   \n",
232
       "1  [{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...   \n",
233
       "2  [{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"nam...   \n",
234
       "3  [{\"name\": \"Legendary Pictures\", \"id\": 923}, {\"...   \n",
235
       "4        [{\"name\": \"Walt Disney Pictures\", \"id\": 2}]   \n",
236
       "\n",
237
       "                                production_countries release_date     revenue  \\\n",
238
       "0  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2009-12-10  2787965087   \n",
239
       "1  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2007-05-19   961000000   \n",
240
       "2  [{\"iso_3166_1\": \"GB\", \"name\": \"United Kingdom\"...   2015-10-26   880674609   \n",
241
       "3  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2012-07-16  1084939099   \n",
242
       "4  [{\"iso_3166_1\": \"US\", \"name\": \"United States o...   2012-03-07   284139100   \n",
243
       "\n",
244
       "   runtime                                   spoken_languages    status  \\\n",
245
       "0    162.0  [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...  Released   \n",
246
       "1    169.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]  Released   \n",
247
       "2    148.0  [{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"},...  Released   \n",
248
       "3    165.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]  Released   \n",
249
       "4    132.0           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]  Released   \n",
250
       "\n",
251
       "                                          tagline  \\\n",
252
       "0                     Enter the World of Pandora.   \n",
253
       "1  At the end of the world, the adventure begins.   \n",
254
       "2                           A Plan No One Escapes   \n",
255
       "3                                 The Legend Ends   \n",
256
       "4            Lost in our world, found in another.   \n",
257
       "\n",
258
       "                                      title  vote_average  vote_count  \n",
259
       "0                                    Avatar           7.2       11800  \n",
260
       "1  Pirates of the Caribbean: At World's End           6.9        4500  \n",
261
       "2                                   Spectre           6.3        4466  \n",
262
       "3                     The Dark Knight Rises           7.6        9106  \n",
263
       "4                               John Carter           6.1        2124  "
264
      ]
265
     },
266
     "execution_count": 3,
267
     "metadata": {},
268
     "output_type": "execute_result"
269
    }
270
   ],
271
   "source": [
272
    "movies.head()"
273
   ]
274
  },
275
  {
276
   "cell_type": "code",
277
   "execution_count": 4,
278
   "id": "3fe5f790",
279
   "metadata": {},
280
   "outputs": [
281
    {
282
     "data": {
283
      "text/plain": [
284
       "(4803, 20)"
285
      ]
286
     },
287
     "execution_count": 4,
288
     "metadata": {},
289
     "output_type": "execute_result"
290
    }
291
   ],
292
   "source": [
293
    "movies.shape"
294
   ]
295
  },
296
  {
297
   "cell_type": "code",
298
   "execution_count": 5,
299
   "id": "3410a6c8",
300
   "metadata": {},
301
   "outputs": [
302
    {
303
     "data": {
304
      "text/html": [
305
       "<div>\n",
306
       "<style scoped>\n",
307
       "    .dataframe tbody tr th:only-of-type {\n",
308
       "        vertical-align: middle;\n",
309
       "    }\n",
310
       "\n",
311
       "    .dataframe tbody tr th {\n",
312
       "        vertical-align: top;\n",
313
       "    }\n",
314
       "\n",
315
       "    .dataframe thead th {\n",
316
       "        text-align: right;\n",
317
       "    }\n",
318
       "</style>\n",
319
       "<table border=\"1\" class=\"dataframe\">\n",
320
       "  <thead>\n",
321
       "    <tr style=\"text-align: right;\">\n",
322
       "      <th></th>\n",
323
       "      <th>movie_id</th>\n",
324
       "      <th>title</th>\n",
325
       "      <th>cast</th>\n",
326
       "      <th>crew</th>\n",
327
       "    </tr>\n",
328
       "  </thead>\n",
329
       "  <tbody>\n",
330
       "    <tr>\n",
331
       "      <th>0</th>\n",
332
       "      <td>19995</td>\n",
333
       "      <td>Avatar</td>\n",
334
       "      <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n",
335
       "      <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n",
336
       "    </tr>\n",
337
       "    <tr>\n",
338
       "      <th>1</th>\n",
339
       "      <td>285</td>\n",
340
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
341
       "      <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n",
342
       "      <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n",
343
       "    </tr>\n",
344
       "    <tr>\n",
345
       "      <th>2</th>\n",
346
       "      <td>206647</td>\n",
347
       "      <td>Spectre</td>\n",
348
       "      <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n",
349
       "      <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n",
350
       "    </tr>\n",
351
       "    <tr>\n",
352
       "      <th>3</th>\n",
353
       "      <td>49026</td>\n",
354
       "      <td>The Dark Knight Rises</td>\n",
355
       "      <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n",
356
       "      <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n",
357
       "    </tr>\n",
358
       "    <tr>\n",
359
       "      <th>4</th>\n",
360
       "      <td>49529</td>\n",
361
       "      <td>John Carter</td>\n",
362
       "      <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n",
363
       "      <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n",
364
       "    </tr>\n",
365
       "  </tbody>\n",
366
       "</table>\n",
367
       "</div>"
368
      ],
369
      "text/plain": [
370
       "   movie_id                                     title  \\\n",
371
       "0     19995                                    Avatar   \n",
372
       "1       285  Pirates of the Caribbean: At World's End   \n",
373
       "2    206647                                   Spectre   \n",
374
       "3     49026                     The Dark Knight Rises   \n",
375
       "4     49529                               John Carter   \n",
376
       "\n",
377
       "                                                cast  \\\n",
378
       "0  [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...   \n",
379
       "1  [{\"cast_id\": 4, \"character\": \"Captain Jack Spa...   \n",
380
       "2  [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...   \n",
381
       "3  [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...   \n",
382
       "4  [{\"cast_id\": 5, \"character\": \"John Carter\", \"c...   \n",
383
       "\n",
384
       "                                                crew  \n",
385
       "0  [{\"credit_id\": \"52fe48009251416c750aca23\", \"de...  \n",
386
       "1  [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...  \n",
387
       "2  [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...  \n",
388
       "3  [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...  \n",
389
       "4  [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...  "
390
      ]
391
     },
392
     "execution_count": 5,
393
     "metadata": {},
394
     "output_type": "execute_result"
395
    }
396
   ],
397
   "source": [
398
    "credits.head()"
399
   ]
400
  },
401
  {
402
   "cell_type": "code",
403
   "execution_count": 6,
404
   "id": "e91d93e4",
405
   "metadata": {},
406
   "outputs": [],
407
   "source": [
408
    "movies = movies.merge(credits,on='title')"
409
   ]
410
  },
411
  {
412
   "cell_type": "code",
413
   "execution_count": 7,
414
   "id": "b491943c",
415
   "metadata": {},
416
   "outputs": [
417
    {
418
     "data": {
419
      "text/html": [
420
       "<div>\n",
421
       "<style scoped>\n",
422
       "    .dataframe tbody tr th:only-of-type {\n",
423
       "        vertical-align: middle;\n",
424
       "    }\n",
425
       "\n",
426
       "    .dataframe tbody tr th {\n",
427
       "        vertical-align: top;\n",
428
       "    }\n",
429
       "\n",
430
       "    .dataframe thead th {\n",
431
       "        text-align: right;\n",
432
       "    }\n",
433
       "</style>\n",
434
       "<table border=\"1\" class=\"dataframe\">\n",
435
       "  <thead>\n",
436
       "    <tr style=\"text-align: right;\">\n",
437
       "      <th></th>\n",
438
       "      <th>budget</th>\n",
439
       "      <th>genres</th>\n",
440
       "      <th>homepage</th>\n",
441
       "      <th>id</th>\n",
442
       "      <th>keywords</th>\n",
443
       "      <th>original_language</th>\n",
444
       "      <th>original_title</th>\n",
445
       "      <th>overview</th>\n",
446
       "      <th>popularity</th>\n",
447
       "      <th>production_companies</th>\n",
448
       "      <th>...</th>\n",
449
       "      <th>runtime</th>\n",
450
       "      <th>spoken_languages</th>\n",
451
       "      <th>status</th>\n",
452
       "      <th>tagline</th>\n",
453
       "      <th>title</th>\n",
454
       "      <th>vote_average</th>\n",
455
       "      <th>vote_count</th>\n",
456
       "      <th>movie_id</th>\n",
457
       "      <th>cast</th>\n",
458
       "      <th>crew</th>\n",
459
       "    </tr>\n",
460
       "  </thead>\n",
461
       "  <tbody>\n",
462
       "    <tr>\n",
463
       "      <th>0</th>\n",
464
       "      <td>237000000</td>\n",
465
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
466
       "      <td>http://www.avatarmovie.com/</td>\n",
467
       "      <td>19995</td>\n",
468
       "      <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n",
469
       "      <td>en</td>\n",
470
       "      <td>Avatar</td>\n",
471
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
472
       "      <td>150.437577</td>\n",
473
       "      <td>[{\"name\": \"Ingenious Film Partners\", \"id\": 289...</td>\n",
474
       "      <td>...</td>\n",
475
       "      <td>162.0</td>\n",
476
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...</td>\n",
477
       "      <td>Released</td>\n",
478
       "      <td>Enter the World of Pandora.</td>\n",
479
       "      <td>Avatar</td>\n",
480
       "      <td>7.2</td>\n",
481
       "      <td>11800</td>\n",
482
       "      <td>19995</td>\n",
483
       "      <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n",
484
       "      <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n",
485
       "    </tr>\n",
486
       "    <tr>\n",
487
       "      <th>1</th>\n",
488
       "      <td>300000000</td>\n",
489
       "      <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...</td>\n",
490
       "      <td>http://disney.go.com/disneypictures/pirates/</td>\n",
491
       "      <td>285</td>\n",
492
       "      <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n",
493
       "      <td>en</td>\n",
494
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
495
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
496
       "      <td>139.082615</td>\n",
497
       "      <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...</td>\n",
498
       "      <td>...</td>\n",
499
       "      <td>169.0</td>\n",
500
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
501
       "      <td>Released</td>\n",
502
       "      <td>At the end of the world, the adventure begins.</td>\n",
503
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
504
       "      <td>6.9</td>\n",
505
       "      <td>4500</td>\n",
506
       "      <td>285</td>\n",
507
       "      <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n",
508
       "      <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n",
509
       "    </tr>\n",
510
       "    <tr>\n",
511
       "      <th>2</th>\n",
512
       "      <td>245000000</td>\n",
513
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
514
       "      <td>http://www.sonypictures.com/movies/spectre/</td>\n",
515
       "      <td>206647</td>\n",
516
       "      <td>[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...</td>\n",
517
       "      <td>en</td>\n",
518
       "      <td>Spectre</td>\n",
519
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
520
       "      <td>107.376788</td>\n",
521
       "      <td>[{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"nam...</td>\n",
522
       "      <td>...</td>\n",
523
       "      <td>148.0</td>\n",
524
       "      <td>[{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"},...</td>\n",
525
       "      <td>Released</td>\n",
526
       "      <td>A Plan No One Escapes</td>\n",
527
       "      <td>Spectre</td>\n",
528
       "      <td>6.3</td>\n",
529
       "      <td>4466</td>\n",
530
       "      <td>206647</td>\n",
531
       "      <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n",
532
       "      <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n",
533
       "    </tr>\n",
534
       "    <tr>\n",
535
       "      <th>3</th>\n",
536
       "      <td>250000000</td>\n",
537
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...</td>\n",
538
       "      <td>http://www.thedarkknightrises.com/</td>\n",
539
       "      <td>49026</td>\n",
540
       "      <td>[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...</td>\n",
541
       "      <td>en</td>\n",
542
       "      <td>The Dark Knight Rises</td>\n",
543
       "      <td>Following the death of District Attorney Harve...</td>\n",
544
       "      <td>112.312950</td>\n",
545
       "      <td>[{\"name\": \"Legendary Pictures\", \"id\": 923}, {\"...</td>\n",
546
       "      <td>...</td>\n",
547
       "      <td>165.0</td>\n",
548
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
549
       "      <td>Released</td>\n",
550
       "      <td>The Legend Ends</td>\n",
551
       "      <td>The Dark Knight Rises</td>\n",
552
       "      <td>7.6</td>\n",
553
       "      <td>9106</td>\n",
554
       "      <td>49026</td>\n",
555
       "      <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n",
556
       "      <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n",
557
       "    </tr>\n",
558
       "    <tr>\n",
559
       "      <th>4</th>\n",
560
       "      <td>260000000</td>\n",
561
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
562
       "      <td>http://movies.disney.com/john-carter</td>\n",
563
       "      <td>49529</td>\n",
564
       "      <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n",
565
       "      <td>en</td>\n",
566
       "      <td>John Carter</td>\n",
567
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
568
       "      <td>43.926995</td>\n",
569
       "      <td>[{\"name\": \"Walt Disney Pictures\", \"id\": 2}]</td>\n",
570
       "      <td>...</td>\n",
571
       "      <td>132.0</td>\n",
572
       "      <td>[{\"iso_639_1\": \"en\", \"name\": \"English\"}]</td>\n",
573
       "      <td>Released</td>\n",
574
       "      <td>Lost in our world, found in another.</td>\n",
575
       "      <td>John Carter</td>\n",
576
       "      <td>6.1</td>\n",
577
       "      <td>2124</td>\n",
578
       "      <td>49529</td>\n",
579
       "      <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n",
580
       "      <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n",
581
       "    </tr>\n",
582
       "  </tbody>\n",
583
       "</table>\n",
584
       "<p>5 rows × 23 columns</p>\n",
585
       "</div>"
586
      ],
587
      "text/plain": [
588
       "      budget                                             genres  \\\n",
589
       "0  237000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
590
       "1  300000000  [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...   \n",
591
       "2  245000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
592
       "3  250000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...   \n",
593
       "4  260000000  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
594
       "\n",
595
       "                                       homepage      id  \\\n",
596
       "0                   http://www.avatarmovie.com/   19995   \n",
597
       "1  http://disney.go.com/disneypictures/pirates/     285   \n",
598
       "2   http://www.sonypictures.com/movies/spectre/  206647   \n",
599
       "3            http://www.thedarkknightrises.com/   49026   \n",
600
       "4          http://movies.disney.com/john-carter   49529   \n",
601
       "\n",
602
       "                                            keywords original_language  \\\n",
603
       "0  [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...                en   \n",
604
       "1  [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...                en   \n",
605
       "2  [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...                en   \n",
606
       "3  [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...                en   \n",
607
       "4  [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...                en   \n",
608
       "\n",
609
       "                             original_title  \\\n",
610
       "0                                    Avatar   \n",
611
       "1  Pirates of the Caribbean: At World's End   \n",
612
       "2                                   Spectre   \n",
613
       "3                     The Dark Knight Rises   \n",
614
       "4                               John Carter   \n",
615
       "\n",
616
       "                                            overview  popularity  \\\n",
617
       "0  In the 22nd century, a paraplegic Marine is di...  150.437577   \n",
618
       "1  Captain Barbossa, long believed to be dead, ha...  139.082615   \n",
619
       "2  A cryptic message from Bond’s past sends him o...  107.376788   \n",
620
       "3  Following the death of District Attorney Harve...  112.312950   \n",
621
       "4  John Carter is a war-weary, former military ca...   43.926995   \n",
622
       "\n",
623
       "                                production_companies  ... runtime  \\\n",
624
       "0  [{\"name\": \"Ingenious Film Partners\", \"id\": 289...  ...   162.0   \n",
625
       "1  [{\"name\": \"Walt Disney Pictures\", \"id\": 2}, {\"...  ...   169.0   \n",
626
       "2  [{\"name\": \"Columbia Pictures\", \"id\": 5}, {\"nam...  ...   148.0   \n",
627
       "3  [{\"name\": \"Legendary Pictures\", \"id\": 923}, {\"...  ...   165.0   \n",
628
       "4        [{\"name\": \"Walt Disney Pictures\", \"id\": 2}]  ...   132.0   \n",
629
       "\n",
630
       "                                    spoken_languages    status  \\\n",
631
       "0  [{\"iso_639_1\": \"en\", \"name\": \"English\"}, {\"iso...  Released   \n",
632
       "1           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]  Released   \n",
633
       "2  [{\"iso_639_1\": \"fr\", \"name\": \"Fran\\u00e7ais\"},...  Released   \n",
634
       "3           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]  Released   \n",
635
       "4           [{\"iso_639_1\": \"en\", \"name\": \"English\"}]  Released   \n",
636
       "\n",
637
       "                                          tagline  \\\n",
638
       "0                     Enter the World of Pandora.   \n",
639
       "1  At the end of the world, the adventure begins.   \n",
640
       "2                           A Plan No One Escapes   \n",
641
       "3                                 The Legend Ends   \n",
642
       "4            Lost in our world, found in another.   \n",
643
       "\n",
644
       "                                      title vote_average vote_count movie_id  \\\n",
645
       "0                                    Avatar          7.2      11800    19995   \n",
646
       "1  Pirates of the Caribbean: At World's End          6.9       4500      285   \n",
647
       "2                                   Spectre          6.3       4466   206647   \n",
648
       "3                     The Dark Knight Rises          7.6       9106    49026   \n",
649
       "4                               John Carter          6.1       2124    49529   \n",
650
       "\n",
651
       "                                                cast  \\\n",
652
       "0  [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...   \n",
653
       "1  [{\"cast_id\": 4, \"character\": \"Captain Jack Spa...   \n",
654
       "2  [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...   \n",
655
       "3  [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...   \n",
656
       "4  [{\"cast_id\": 5, \"character\": \"John Carter\", \"c...   \n",
657
       "\n",
658
       "                                                crew  \n",
659
       "0  [{\"credit_id\": \"52fe48009251416c750aca23\", \"de...  \n",
660
       "1  [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...  \n",
661
       "2  [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...  \n",
662
       "3  [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...  \n",
663
       "4  [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...  \n",
664
       "\n",
665
       "[5 rows x 23 columns]"
666
      ]
667
     },
668
     "execution_count": 7,
669
     "metadata": {},
670
     "output_type": "execute_result"
671
    }
672
   ],
673
   "source": [
674
    "movies.head()\n",
675
    "# budget\n",
676
    "# homepage\n",
677
    "# id\n",
678
    "# original_language\n",
679
    "# original_title\n",
680
    "# popularity\n",
681
    "# production_comapny\n",
682
    "# production_countries\n",
683
    "# release-date(not sure)"
684
   ]
685
  },
686
  {
687
   "cell_type": "code",
688
   "execution_count": 8,
689
   "id": "d517efd4",
690
   "metadata": {},
691
   "outputs": [],
692
   "source": [
693
    "movies = movies[['movie_id','title','overview','genres','keywords','cast','crew']]"
694
   ]
695
  },
696
  {
697
   "cell_type": "code",
698
   "execution_count": 9,
699
   "id": "6e0d16dc",
700
   "metadata": {},
701
   "outputs": [
702
    {
703
     "data": {
704
      "text/html": [
705
       "<div>\n",
706
       "<style scoped>\n",
707
       "    .dataframe tbody tr th:only-of-type {\n",
708
       "        vertical-align: middle;\n",
709
       "    }\n",
710
       "\n",
711
       "    .dataframe tbody tr th {\n",
712
       "        vertical-align: top;\n",
713
       "    }\n",
714
       "\n",
715
       "    .dataframe thead th {\n",
716
       "        text-align: right;\n",
717
       "    }\n",
718
       "</style>\n",
719
       "<table border=\"1\" class=\"dataframe\">\n",
720
       "  <thead>\n",
721
       "    <tr style=\"text-align: right;\">\n",
722
       "      <th></th>\n",
723
       "      <th>movie_id</th>\n",
724
       "      <th>title</th>\n",
725
       "      <th>overview</th>\n",
726
       "      <th>genres</th>\n",
727
       "      <th>keywords</th>\n",
728
       "      <th>cast</th>\n",
729
       "      <th>crew</th>\n",
730
       "    </tr>\n",
731
       "  </thead>\n",
732
       "  <tbody>\n",
733
       "    <tr>\n",
734
       "      <th>0</th>\n",
735
       "      <td>19995</td>\n",
736
       "      <td>Avatar</td>\n",
737
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
738
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
739
       "      <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n",
740
       "      <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n",
741
       "      <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n",
742
       "    </tr>\n",
743
       "    <tr>\n",
744
       "      <th>1</th>\n",
745
       "      <td>285</td>\n",
746
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
747
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
748
       "      <td>[{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...</td>\n",
749
       "      <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n",
750
       "      <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n",
751
       "      <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n",
752
       "    </tr>\n",
753
       "    <tr>\n",
754
       "      <th>2</th>\n",
755
       "      <td>206647</td>\n",
756
       "      <td>Spectre</td>\n",
757
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
758
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
759
       "      <td>[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...</td>\n",
760
       "      <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n",
761
       "      <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n",
762
       "    </tr>\n",
763
       "    <tr>\n",
764
       "      <th>3</th>\n",
765
       "      <td>49026</td>\n",
766
       "      <td>The Dark Knight Rises</td>\n",
767
       "      <td>Following the death of District Attorney Harve...</td>\n",
768
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...</td>\n",
769
       "      <td>[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...</td>\n",
770
       "      <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n",
771
       "      <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n",
772
       "    </tr>\n",
773
       "    <tr>\n",
774
       "      <th>4</th>\n",
775
       "      <td>49529</td>\n",
776
       "      <td>John Carter</td>\n",
777
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
778
       "      <td>[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...</td>\n",
779
       "      <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n",
780
       "      <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n",
781
       "      <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n",
782
       "    </tr>\n",
783
       "  </tbody>\n",
784
       "</table>\n",
785
       "</div>"
786
      ],
787
      "text/plain": [
788
       "   movie_id                                     title  \\\n",
789
       "0     19995                                    Avatar   \n",
790
       "1       285  Pirates of the Caribbean: At World's End   \n",
791
       "2    206647                                   Spectre   \n",
792
       "3     49026                     The Dark Knight Rises   \n",
793
       "4     49529                               John Carter   \n",
794
       "\n",
795
       "                                            overview  \\\n",
796
       "0  In the 22nd century, a paraplegic Marine is di...   \n",
797
       "1  Captain Barbossa, long believed to be dead, ha...   \n",
798
       "2  A cryptic message from Bond’s past sends him o...   \n",
799
       "3  Following the death of District Attorney Harve...   \n",
800
       "4  John Carter is a war-weary, former military ca...   \n",
801
       "\n",
802
       "                                              genres  \\\n",
803
       "0  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
804
       "1  [{\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"...   \n",
805
       "2  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
806
       "3  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 80, \"nam...   \n",
807
       "4  [{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"nam...   \n",
808
       "\n",
809
       "                                            keywords  \\\n",
810
       "0  [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...   \n",
811
       "1  [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...   \n",
812
       "2  [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...   \n",
813
       "3  [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...   \n",
814
       "4  [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...   \n",
815
       "\n",
816
       "                                                cast  \\\n",
817
       "0  [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...   \n",
818
       "1  [{\"cast_id\": 4, \"character\": \"Captain Jack Spa...   \n",
819
       "2  [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...   \n",
820
       "3  [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...   \n",
821
       "4  [{\"cast_id\": 5, \"character\": \"John Carter\", \"c...   \n",
822
       "\n",
823
       "                                                crew  \n",
824
       "0  [{\"credit_id\": \"52fe48009251416c750aca23\", \"de...  \n",
825
       "1  [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...  \n",
826
       "2  [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...  \n",
827
       "3  [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...  \n",
828
       "4  [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...  "
829
      ]
830
     },
831
     "execution_count": 9,
832
     "metadata": {},
833
     "output_type": "execute_result"
834
    }
835
   ],
836
   "source": [
837
    "movies.head()"
838
   ]
839
  },
840
  {
841
   "cell_type": "code",
842
   "execution_count": 10,
843
   "id": "acb58b32",
844
   "metadata": {},
845
   "outputs": [
846
    {
847
     "data": {
848
      "text/plain": [
849
       "movie_id    0\n",
850
       "title       0\n",
851
       "overview    3\n",
852
       "genres      0\n",
853
       "keywords    0\n",
854
       "cast        0\n",
855
       "crew        0\n",
856
       "dtype: int64"
857
      ]
858
     },
859
     "execution_count": 10,
860
     "metadata": {},
861
     "output_type": "execute_result"
862
    }
863
   ],
864
   "source": [
865
    "movies.isnull().sum()"
866
   ]
867
  },
868
  {
869
   "cell_type": "code",
870
   "execution_count": 11,
871
   "id": "122e570d",
872
   "metadata": {},
873
   "outputs": [],
874
   "source": [
875
    "movies.dropna(inplace=True)"
876
   ]
877
  },
878
  {
879
   "cell_type": "code",
880
   "execution_count": 12,
881
   "id": "135344a9",
882
   "metadata": {},
883
   "outputs": [],
884
   "source": [
885
    "#movies.duplicated().sum()"
886
   ]
887
  },
888
  {
889
   "cell_type": "code",
890
   "execution_count": 13,
891
   "id": "be5c56eb",
892
   "metadata": {},
893
   "outputs": [
894
    {
895
     "data": {
896
      "text/plain": [
897
       "'[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \"Fantasy\"}, {\"id\": 878, \"name\": \"Science Fiction\"}]'"
898
      ]
899
     },
900
     "execution_count": 13,
901
     "metadata": {},
902
     "output_type": "execute_result"
903
    }
904
   ],
905
   "source": [
906
    "movies.iloc[0].genres"
907
   ]
908
  },
909
  {
910
   "cell_type": "code",
911
   "execution_count": 14,
912
   "id": "d8b61c39",
913
   "metadata": {},
914
   "outputs": [],
915
   "source": [
916
    "#ast to convert string-list to list\n",
917
    "import ast"
918
   ]
919
  },
920
  {
921
   "cell_type": "code",
922
   "execution_count": 15,
923
   "id": "bfcfea46",
924
   "metadata": {},
925
   "outputs": [],
926
   "source": [
927
    "def convert(text):\n",
928
    "    L = []\n",
929
    "    for i in ast.literal_eval(text):\n",
930
    "        L.append(i['name']) \n",
931
    "    return L "
932
   ]
933
  },
934
  {
935
   "cell_type": "code",
936
   "execution_count": null,
937
   "id": "6d7af2b7",
938
   "metadata": {},
939
   "outputs": [],
940
   "source": []
941
  },
942
  {
943
   "cell_type": "code",
944
   "execution_count": 16,
945
   "id": "a643728f",
946
   "metadata": {},
947
   "outputs": [
948
    {
949
     "data": {
950
      "text/html": [
951
       "<div>\n",
952
       "<style scoped>\n",
953
       "    .dataframe tbody tr th:only-of-type {\n",
954
       "        vertical-align: middle;\n",
955
       "    }\n",
956
       "\n",
957
       "    .dataframe tbody tr th {\n",
958
       "        vertical-align: top;\n",
959
       "    }\n",
960
       "\n",
961
       "    .dataframe thead th {\n",
962
       "        text-align: right;\n",
963
       "    }\n",
964
       "</style>\n",
965
       "<table border=\"1\" class=\"dataframe\">\n",
966
       "  <thead>\n",
967
       "    <tr style=\"text-align: right;\">\n",
968
       "      <th></th>\n",
969
       "      <th>movie_id</th>\n",
970
       "      <th>title</th>\n",
971
       "      <th>overview</th>\n",
972
       "      <th>genres</th>\n",
973
       "      <th>keywords</th>\n",
974
       "      <th>cast</th>\n",
975
       "      <th>crew</th>\n",
976
       "    </tr>\n",
977
       "  </thead>\n",
978
       "  <tbody>\n",
979
       "    <tr>\n",
980
       "      <th>0</th>\n",
981
       "      <td>19995</td>\n",
982
       "      <td>Avatar</td>\n",
983
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
984
       "      <td>[Action, Adventure, Fantasy, Science Fiction]</td>\n",
985
       "      <td>[{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...</td>\n",
986
       "      <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n",
987
       "      <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n",
988
       "    </tr>\n",
989
       "    <tr>\n",
990
       "      <th>1</th>\n",
991
       "      <td>285</td>\n",
992
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
993
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
994
       "      <td>[Adventure, Fantasy, Action]</td>\n",
995
       "      <td>[{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...</td>\n",
996
       "      <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n",
997
       "      <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n",
998
       "    </tr>\n",
999
       "    <tr>\n",
1000
       "      <th>2</th>\n",
1001
       "      <td>206647</td>\n",
1002
       "      <td>Spectre</td>\n",
1003
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
1004
       "      <td>[Action, Adventure, Crime]</td>\n",
1005
       "      <td>[{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...</td>\n",
1006
       "      <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n",
1007
       "      <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n",
1008
       "    </tr>\n",
1009
       "    <tr>\n",
1010
       "      <th>3</th>\n",
1011
       "      <td>49026</td>\n",
1012
       "      <td>The Dark Knight Rises</td>\n",
1013
       "      <td>Following the death of District Attorney Harve...</td>\n",
1014
       "      <td>[Action, Crime, Drama, Thriller]</td>\n",
1015
       "      <td>[{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...</td>\n",
1016
       "      <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n",
1017
       "      <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n",
1018
       "    </tr>\n",
1019
       "    <tr>\n",
1020
       "      <th>4</th>\n",
1021
       "      <td>49529</td>\n",
1022
       "      <td>John Carter</td>\n",
1023
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
1024
       "      <td>[Action, Adventure, Science Fiction]</td>\n",
1025
       "      <td>[{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...</td>\n",
1026
       "      <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n",
1027
       "      <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n",
1028
       "    </tr>\n",
1029
       "  </tbody>\n",
1030
       "</table>\n",
1031
       "</div>"
1032
      ],
1033
      "text/plain": [
1034
       "   movie_id                                     title  \\\n",
1035
       "0     19995                                    Avatar   \n",
1036
       "1       285  Pirates of the Caribbean: At World's End   \n",
1037
       "2    206647                                   Spectre   \n",
1038
       "3     49026                     The Dark Knight Rises   \n",
1039
       "4     49529                               John Carter   \n",
1040
       "\n",
1041
       "                                            overview  \\\n",
1042
       "0  In the 22nd century, a paraplegic Marine is di...   \n",
1043
       "1  Captain Barbossa, long believed to be dead, ha...   \n",
1044
       "2  A cryptic message from Bond’s past sends him o...   \n",
1045
       "3  Following the death of District Attorney Harve...   \n",
1046
       "4  John Carter is a war-weary, former military ca...   \n",
1047
       "\n",
1048
       "                                          genres  \\\n",
1049
       "0  [Action, Adventure, Fantasy, Science Fiction]   \n",
1050
       "1                   [Adventure, Fantasy, Action]   \n",
1051
       "2                     [Action, Adventure, Crime]   \n",
1052
       "3               [Action, Crime, Drama, Thriller]   \n",
1053
       "4           [Action, Adventure, Science Fiction]   \n",
1054
       "\n",
1055
       "                                            keywords  \\\n",
1056
       "0  [{\"id\": 1463, \"name\": \"culture clash\"}, {\"id\":...   \n",
1057
       "1  [{\"id\": 270, \"name\": \"ocean\"}, {\"id\": 726, \"na...   \n",
1058
       "2  [{\"id\": 470, \"name\": \"spy\"}, {\"id\": 818, \"name...   \n",
1059
       "3  [{\"id\": 849, \"name\": \"dc comics\"}, {\"id\": 853,...   \n",
1060
       "4  [{\"id\": 818, \"name\": \"based on novel\"}, {\"id\":...   \n",
1061
       "\n",
1062
       "                                                cast  \\\n",
1063
       "0  [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...   \n",
1064
       "1  [{\"cast_id\": 4, \"character\": \"Captain Jack Spa...   \n",
1065
       "2  [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...   \n",
1066
       "3  [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...   \n",
1067
       "4  [{\"cast_id\": 5, \"character\": \"John Carter\", \"c...   \n",
1068
       "\n",
1069
       "                                                crew  \n",
1070
       "0  [{\"credit_id\": \"52fe48009251416c750aca23\", \"de...  \n",
1071
       "1  [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...  \n",
1072
       "2  [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...  \n",
1073
       "3  [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...  \n",
1074
       "4  [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...  "
1075
      ]
1076
     },
1077
     "execution_count": 16,
1078
     "metadata": {},
1079
     "output_type": "execute_result"
1080
    }
1081
   ],
1082
   "source": [
1083
    "movies['genres'] = movies['genres'].apply(convert)\n",
1084
    "movies.head()"
1085
   ]
1086
  },
1087
  {
1088
   "cell_type": "code",
1089
   "execution_count": 17,
1090
   "id": "d845d787",
1091
   "metadata": {},
1092
   "outputs": [
1093
    {
1094
     "data": {
1095
      "text/html": [
1096
       "<div>\n",
1097
       "<style scoped>\n",
1098
       "    .dataframe tbody tr th:only-of-type {\n",
1099
       "        vertical-align: middle;\n",
1100
       "    }\n",
1101
       "\n",
1102
       "    .dataframe tbody tr th {\n",
1103
       "        vertical-align: top;\n",
1104
       "    }\n",
1105
       "\n",
1106
       "    .dataframe thead th {\n",
1107
       "        text-align: right;\n",
1108
       "    }\n",
1109
       "</style>\n",
1110
       "<table border=\"1\" class=\"dataframe\">\n",
1111
       "  <thead>\n",
1112
       "    <tr style=\"text-align: right;\">\n",
1113
       "      <th></th>\n",
1114
       "      <th>movie_id</th>\n",
1115
       "      <th>title</th>\n",
1116
       "      <th>overview</th>\n",
1117
       "      <th>genres</th>\n",
1118
       "      <th>keywords</th>\n",
1119
       "      <th>cast</th>\n",
1120
       "      <th>crew</th>\n",
1121
       "    </tr>\n",
1122
       "  </thead>\n",
1123
       "  <tbody>\n",
1124
       "    <tr>\n",
1125
       "      <th>0</th>\n",
1126
       "      <td>19995</td>\n",
1127
       "      <td>Avatar</td>\n",
1128
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
1129
       "      <td>[Action, Adventure, Fantasy, Science Fiction]</td>\n",
1130
       "      <td>[culture clash, future, space war, space colon...</td>\n",
1131
       "      <td>[{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...</td>\n",
1132
       "      <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n",
1133
       "    </tr>\n",
1134
       "    <tr>\n",
1135
       "      <th>1</th>\n",
1136
       "      <td>285</td>\n",
1137
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
1138
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
1139
       "      <td>[Adventure, Fantasy, Action]</td>\n",
1140
       "      <td>[ocean, drug abuse, exotic island, east india ...</td>\n",
1141
       "      <td>[{\"cast_id\": 4, \"character\": \"Captain Jack Spa...</td>\n",
1142
       "      <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n",
1143
       "    </tr>\n",
1144
       "    <tr>\n",
1145
       "      <th>2</th>\n",
1146
       "      <td>206647</td>\n",
1147
       "      <td>Spectre</td>\n",
1148
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
1149
       "      <td>[Action, Adventure, Crime]</td>\n",
1150
       "      <td>[spy, based on novel, secret agent, sequel, mi...</td>\n",
1151
       "      <td>[{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...</td>\n",
1152
       "      <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n",
1153
       "    </tr>\n",
1154
       "    <tr>\n",
1155
       "      <th>3</th>\n",
1156
       "      <td>49026</td>\n",
1157
       "      <td>The Dark Knight Rises</td>\n",
1158
       "      <td>Following the death of District Attorney Harve...</td>\n",
1159
       "      <td>[Action, Crime, Drama, Thriller]</td>\n",
1160
       "      <td>[dc comics, crime fighter, terrorist, secret i...</td>\n",
1161
       "      <td>[{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...</td>\n",
1162
       "      <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n",
1163
       "    </tr>\n",
1164
       "    <tr>\n",
1165
       "      <th>4</th>\n",
1166
       "      <td>49529</td>\n",
1167
       "      <td>John Carter</td>\n",
1168
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
1169
       "      <td>[Action, Adventure, Science Fiction]</td>\n",
1170
       "      <td>[based on novel, mars, medallion, space travel...</td>\n",
1171
       "      <td>[{\"cast_id\": 5, \"character\": \"John Carter\", \"c...</td>\n",
1172
       "      <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n",
1173
       "    </tr>\n",
1174
       "  </tbody>\n",
1175
       "</table>\n",
1176
       "</div>"
1177
      ],
1178
      "text/plain": [
1179
       "   movie_id                                     title  \\\n",
1180
       "0     19995                                    Avatar   \n",
1181
       "1       285  Pirates of the Caribbean: At World's End   \n",
1182
       "2    206647                                   Spectre   \n",
1183
       "3     49026                     The Dark Knight Rises   \n",
1184
       "4     49529                               John Carter   \n",
1185
       "\n",
1186
       "                                            overview  \\\n",
1187
       "0  In the 22nd century, a paraplegic Marine is di...   \n",
1188
       "1  Captain Barbossa, long believed to be dead, ha...   \n",
1189
       "2  A cryptic message from Bond’s past sends him o...   \n",
1190
       "3  Following the death of District Attorney Harve...   \n",
1191
       "4  John Carter is a war-weary, former military ca...   \n",
1192
       "\n",
1193
       "                                          genres  \\\n",
1194
       "0  [Action, Adventure, Fantasy, Science Fiction]   \n",
1195
       "1                   [Adventure, Fantasy, Action]   \n",
1196
       "2                     [Action, Adventure, Crime]   \n",
1197
       "3               [Action, Crime, Drama, Thriller]   \n",
1198
       "4           [Action, Adventure, Science Fiction]   \n",
1199
       "\n",
1200
       "                                            keywords  \\\n",
1201
       "0  [culture clash, future, space war, space colon...   \n",
1202
       "1  [ocean, drug abuse, exotic island, east india ...   \n",
1203
       "2  [spy, based on novel, secret agent, sequel, mi...   \n",
1204
       "3  [dc comics, crime fighter, terrorist, secret i...   \n",
1205
       "4  [based on novel, mars, medallion, space travel...   \n",
1206
       "\n",
1207
       "                                                cast  \\\n",
1208
       "0  [{\"cast_id\": 242, \"character\": \"Jake Sully\", \"...   \n",
1209
       "1  [{\"cast_id\": 4, \"character\": \"Captain Jack Spa...   \n",
1210
       "2  [{\"cast_id\": 1, \"character\": \"James Bond\", \"cr...   \n",
1211
       "3  [{\"cast_id\": 2, \"character\": \"Bruce Wayne / Ba...   \n",
1212
       "4  [{\"cast_id\": 5, \"character\": \"John Carter\", \"c...   \n",
1213
       "\n",
1214
       "                                                crew  \n",
1215
       "0  [{\"credit_id\": \"52fe48009251416c750aca23\", \"de...  \n",
1216
       "1  [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...  \n",
1217
       "2  [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...  \n",
1218
       "3  [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...  \n",
1219
       "4  [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...  "
1220
      ]
1221
     },
1222
     "execution_count": 17,
1223
     "metadata": {},
1224
     "output_type": "execute_result"
1225
    }
1226
   ],
1227
   "source": [
1228
    "movies['keywords'] = movies['keywords'].apply(convert)\n",
1229
    "movies.head()"
1230
   ]
1231
  },
1232
  {
1233
   "cell_type": "code",
1234
   "execution_count": 18,
1235
   "id": "511b76bd",
1236
   "metadata": {},
1237
   "outputs": [
1238
    {
1239
     "data": {
1240
      "text/plain": [
1241
       "[{'id': 28, 'name': 'Action'},\n",
1242
       " {'id': 12, 'name': 'Adventure'},\n",
1243
       " {'id': 14, 'name': 'Fantasy'},\n",
1244
       " {'id': 878, 'name': 'Science Fiction'}]"
1245
      ]
1246
     },
1247
     "execution_count": 18,
1248
     "metadata": {},
1249
     "output_type": "execute_result"
1250
    }
1251
   ],
1252
   "source": [
1253
    "import ast\n",
1254
    "ast.literal_eval('[{\"id\": 28, \"name\": \"Action\"}, {\"id\": 12, \"name\": \"Adventure\"}, {\"id\": 14, \"name\": \"Fantasy\"}, {\"id\": 878, \"name\":\"Science Fiction\"}]')"
1255
   ]
1256
  },
1257
  {
1258
   "cell_type": "code",
1259
   "execution_count": 19,
1260
   "id": "7024e6a0",
1261
   "metadata": {},
1262
   "outputs": [],
1263
   "source": [
1264
    "def convert3(text):\n",
1265
    "    L = []\n",
1266
    "    counter = 0\n",
1267
    "    for i in ast.literal_eval(text):\n",
1268
    "        if counter < 3:\n",
1269
    "            L.append(i['name'])\n",
1270
    "        counter+=1\n",
1271
    "    return L "
1272
   ]
1273
  },
1274
  {
1275
   "cell_type": "code",
1276
   "execution_count": 20,
1277
   "id": "dec536a8",
1278
   "metadata": {},
1279
   "outputs": [
1280
    {
1281
     "data": {
1282
      "text/html": [
1283
       "<div>\n",
1284
       "<style scoped>\n",
1285
       "    .dataframe tbody tr th:only-of-type {\n",
1286
       "        vertical-align: middle;\n",
1287
       "    }\n",
1288
       "\n",
1289
       "    .dataframe tbody tr th {\n",
1290
       "        vertical-align: top;\n",
1291
       "    }\n",
1292
       "\n",
1293
       "    .dataframe thead th {\n",
1294
       "        text-align: right;\n",
1295
       "    }\n",
1296
       "</style>\n",
1297
       "<table border=\"1\" class=\"dataframe\">\n",
1298
       "  <thead>\n",
1299
       "    <tr style=\"text-align: right;\">\n",
1300
       "      <th></th>\n",
1301
       "      <th>movie_id</th>\n",
1302
       "      <th>title</th>\n",
1303
       "      <th>overview</th>\n",
1304
       "      <th>genres</th>\n",
1305
       "      <th>keywords</th>\n",
1306
       "      <th>cast</th>\n",
1307
       "      <th>crew</th>\n",
1308
       "    </tr>\n",
1309
       "  </thead>\n",
1310
       "  <tbody>\n",
1311
       "    <tr>\n",
1312
       "      <th>0</th>\n",
1313
       "      <td>19995</td>\n",
1314
       "      <td>Avatar</td>\n",
1315
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
1316
       "      <td>[Action, Adventure, Fantasy, Science Fiction]</td>\n",
1317
       "      <td>[culture clash, future, space war, space colon...</td>\n",
1318
       "      <td>[Sam Worthington, Zoe Saldana, Sigourney Weave...</td>\n",
1319
       "      <td>[{\"credit_id\": \"52fe48009251416c750aca23\", \"de...</td>\n",
1320
       "    </tr>\n",
1321
       "    <tr>\n",
1322
       "      <th>1</th>\n",
1323
       "      <td>285</td>\n",
1324
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
1325
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
1326
       "      <td>[Adventure, Fantasy, Action]</td>\n",
1327
       "      <td>[ocean, drug abuse, exotic island, east india ...</td>\n",
1328
       "      <td>[Johnny Depp, Orlando Bloom, Keira Knightley, ...</td>\n",
1329
       "      <td>[{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...</td>\n",
1330
       "    </tr>\n",
1331
       "    <tr>\n",
1332
       "      <th>2</th>\n",
1333
       "      <td>206647</td>\n",
1334
       "      <td>Spectre</td>\n",
1335
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
1336
       "      <td>[Action, Adventure, Crime]</td>\n",
1337
       "      <td>[spy, based on novel, secret agent, sequel, mi...</td>\n",
1338
       "      <td>[Daniel Craig, Christoph Waltz, Léa Seydoux, R...</td>\n",
1339
       "      <td>[{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...</td>\n",
1340
       "    </tr>\n",
1341
       "    <tr>\n",
1342
       "      <th>3</th>\n",
1343
       "      <td>49026</td>\n",
1344
       "      <td>The Dark Knight Rises</td>\n",
1345
       "      <td>Following the death of District Attorney Harve...</td>\n",
1346
       "      <td>[Action, Crime, Drama, Thriller]</td>\n",
1347
       "      <td>[dc comics, crime fighter, terrorist, secret i...</td>\n",
1348
       "      <td>[Christian Bale, Michael Caine, Gary Oldman, A...</td>\n",
1349
       "      <td>[{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...</td>\n",
1350
       "    </tr>\n",
1351
       "    <tr>\n",
1352
       "      <th>4</th>\n",
1353
       "      <td>49529</td>\n",
1354
       "      <td>John Carter</td>\n",
1355
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
1356
       "      <td>[Action, Adventure, Science Fiction]</td>\n",
1357
       "      <td>[based on novel, mars, medallion, space travel...</td>\n",
1358
       "      <td>[Taylor Kitsch, Lynn Collins, Samantha Morton,...</td>\n",
1359
       "      <td>[{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...</td>\n",
1360
       "    </tr>\n",
1361
       "  </tbody>\n",
1362
       "</table>\n",
1363
       "</div>"
1364
      ],
1365
      "text/plain": [
1366
       "   movie_id                                     title  \\\n",
1367
       "0     19995                                    Avatar   \n",
1368
       "1       285  Pirates of the Caribbean: At World's End   \n",
1369
       "2    206647                                   Spectre   \n",
1370
       "3     49026                     The Dark Knight Rises   \n",
1371
       "4     49529                               John Carter   \n",
1372
       "\n",
1373
       "                                            overview  \\\n",
1374
       "0  In the 22nd century, a paraplegic Marine is di...   \n",
1375
       "1  Captain Barbossa, long believed to be dead, ha...   \n",
1376
       "2  A cryptic message from Bond’s past sends him o...   \n",
1377
       "3  Following the death of District Attorney Harve...   \n",
1378
       "4  John Carter is a war-weary, former military ca...   \n",
1379
       "\n",
1380
       "                                          genres  \\\n",
1381
       "0  [Action, Adventure, Fantasy, Science Fiction]   \n",
1382
       "1                   [Adventure, Fantasy, Action]   \n",
1383
       "2                     [Action, Adventure, Crime]   \n",
1384
       "3               [Action, Crime, Drama, Thriller]   \n",
1385
       "4           [Action, Adventure, Science Fiction]   \n",
1386
       "\n",
1387
       "                                            keywords  \\\n",
1388
       "0  [culture clash, future, space war, space colon...   \n",
1389
       "1  [ocean, drug abuse, exotic island, east india ...   \n",
1390
       "2  [spy, based on novel, secret agent, sequel, mi...   \n",
1391
       "3  [dc comics, crime fighter, terrorist, secret i...   \n",
1392
       "4  [based on novel, mars, medallion, space travel...   \n",
1393
       "\n",
1394
       "                                                cast  \\\n",
1395
       "0  [Sam Worthington, Zoe Saldana, Sigourney Weave...   \n",
1396
       "1  [Johnny Depp, Orlando Bloom, Keira Knightley, ...   \n",
1397
       "2  [Daniel Craig, Christoph Waltz, Léa Seydoux, R...   \n",
1398
       "3  [Christian Bale, Michael Caine, Gary Oldman, A...   \n",
1399
       "4  [Taylor Kitsch, Lynn Collins, Samantha Morton,...   \n",
1400
       "\n",
1401
       "                                                crew  \n",
1402
       "0  [{\"credit_id\": \"52fe48009251416c750aca23\", \"de...  \n",
1403
       "1  [{\"credit_id\": \"52fe4232c3a36847f800b579\", \"de...  \n",
1404
       "2  [{\"credit_id\": \"54805967c3a36829b5002c41\", \"de...  \n",
1405
       "3  [{\"credit_id\": \"52fe4781c3a36847f81398c3\", \"de...  \n",
1406
       "4  [{\"credit_id\": \"52fe479ac3a36847f813eaa3\", \"de...  "
1407
      ]
1408
     },
1409
     "execution_count": 20,
1410
     "metadata": {},
1411
     "output_type": "execute_result"
1412
    }
1413
   ],
1414
   "source": [
1415
    "movies['cast'] = movies['cast'].apply(convert)\n",
1416
    "movies.head()"
1417
   ]
1418
  },
1419
  {
1420
   "cell_type": "code",
1421
   "execution_count": 21,
1422
   "id": "ee640520",
1423
   "metadata": {},
1424
   "outputs": [],
1425
   "source": [
1426
    "movies['cast'] = movies['cast'].apply(lambda x:x[0:3])"
1427
   ]
1428
  },
1429
  {
1430
   "cell_type": "code",
1431
   "execution_count": 22,
1432
   "id": "7597779d",
1433
   "metadata": {},
1434
   "outputs": [],
1435
   "source": [
1436
    "def fetch_director(text):\n",
1437
    "    L = []\n",
1438
    "    for i in ast.literal_eval(text):\n",
1439
    "        if i['job'] == 'Director': \n",
1440
    "            L.append(i['name'])\n",
1441
    "    return L "
1442
   ]
1443
  },
1444
  {
1445
   "cell_type": "code",
1446
   "execution_count": 23,
1447
   "id": "913c4480",
1448
   "metadata": {},
1449
   "outputs": [],
1450
   "source": [
1451
    "movies['crew'] = movies['crew'].apply(fetch_director)"
1452
   ]
1453
  },
1454
  {
1455
   "cell_type": "code",
1456
   "execution_count": 24,
1457
   "id": "d87dcda7",
1458
   "metadata": {},
1459
   "outputs": [
1460
    {
1461
     "data": {
1462
      "text/html": [
1463
       "<div>\n",
1464
       "<style scoped>\n",
1465
       "    .dataframe tbody tr th:only-of-type {\n",
1466
       "        vertical-align: middle;\n",
1467
       "    }\n",
1468
       "\n",
1469
       "    .dataframe tbody tr th {\n",
1470
       "        vertical-align: top;\n",
1471
       "    }\n",
1472
       "\n",
1473
       "    .dataframe thead th {\n",
1474
       "        text-align: right;\n",
1475
       "    }\n",
1476
       "</style>\n",
1477
       "<table border=\"1\" class=\"dataframe\">\n",
1478
       "  <thead>\n",
1479
       "    <tr style=\"text-align: right;\">\n",
1480
       "      <th></th>\n",
1481
       "      <th>movie_id</th>\n",
1482
       "      <th>title</th>\n",
1483
       "      <th>overview</th>\n",
1484
       "      <th>genres</th>\n",
1485
       "      <th>keywords</th>\n",
1486
       "      <th>cast</th>\n",
1487
       "      <th>crew</th>\n",
1488
       "    </tr>\n",
1489
       "  </thead>\n",
1490
       "  <tbody>\n",
1491
       "    <tr>\n",
1492
       "      <th>4702</th>\n",
1493
       "      <td>79120</td>\n",
1494
       "      <td>Weekend</td>\n",
1495
       "      <td>After a drunken house party with his straight ...</td>\n",
1496
       "      <td>[Drama, Romance]</td>\n",
1497
       "      <td>[gay, great britain, one-night stand, independ...</td>\n",
1498
       "      <td>[Tom Cullen, Chris New, Jonathan Race]</td>\n",
1499
       "      <td>[Andrew Haigh]</td>\n",
1500
       "    </tr>\n",
1501
       "    <tr>\n",
1502
       "      <th>1341</th>\n",
1503
       "      <td>8470</td>\n",
1504
       "      <td>John Q</td>\n",
1505
       "      <td>John Quincy Archibald is a father and husband ...</td>\n",
1506
       "      <td>[Drama, Thriller, Crime]</td>\n",
1507
       "      <td>[father son relationship, chicago, heart attac...</td>\n",
1508
       "      <td>[Denzel Washington, Robert Duvall, Anne Heche]</td>\n",
1509
       "      <td>[Nick Cassavetes]</td>\n",
1510
       "    </tr>\n",
1511
       "    <tr>\n",
1512
       "      <th>3758</th>\n",
1513
       "      <td>8357</td>\n",
1514
       "      <td>What the #$*! Do We (K)now!?</td>\n",
1515
       "      <td>Amanda (Marlee Maitlin) is a divorced woman wh...</td>\n",
1516
       "      <td>[Documentary]</td>\n",
1517
       "      <td>[alternate dimension, new age, parallel world,...</td>\n",
1518
       "      <td>[Marlee Matlin, Elaine Hendrix, Robert Blanche]</td>\n",
1519
       "      <td>[William Arntz, Betsy Chasse]</td>\n",
1520
       "    </tr>\n",
1521
       "    <tr>\n",
1522
       "      <th>2423</th>\n",
1523
       "      <td>9792</td>\n",
1524
       "      <td>The Hills Have Eyes</td>\n",
1525
       "      <td>Based on Wes Craven's 1977 suspenseful cult cl...</td>\n",
1526
       "      <td>[Horror, Thriller]</td>\n",
1527
       "      <td>[ambush, new mexico, van, family holiday, axe ...</td>\n",
1528
       "      <td>[Aaron Stanford, Kathleen Quinlan, Vinessa Shaw]</td>\n",
1529
       "      <td>[Alexandre Aja]</td>\n",
1530
       "    </tr>\n",
1531
       "    <tr>\n",
1532
       "      <th>297</th>\n",
1533
       "      <td>1372</td>\n",
1534
       "      <td>Blood Diamond</td>\n",
1535
       "      <td>An ex-mercenary turned smuggler. A Mende fishe...</td>\n",
1536
       "      <td>[Drama, Thriller, Action]</td>\n",
1537
       "      <td>[rebel, journalist, journalism, loss of family...</td>\n",
1538
       "      <td>[Leonardo DiCaprio, Djimon Hounsou, Jennifer C...</td>\n",
1539
       "      <td>[Edward Zwick]</td>\n",
1540
       "    </tr>\n",
1541
       "  </tbody>\n",
1542
       "</table>\n",
1543
       "</div>"
1544
      ],
1545
      "text/plain": [
1546
       "      movie_id                         title  \\\n",
1547
       "4702     79120                       Weekend   \n",
1548
       "1341      8470                        John Q   \n",
1549
       "3758      8357  What the #$*! Do We (K)now!?   \n",
1550
       "2423      9792           The Hills Have Eyes   \n",
1551
       "297       1372                 Blood Diamond   \n",
1552
       "\n",
1553
       "                                               overview  \\\n",
1554
       "4702  After a drunken house party with his straight ...   \n",
1555
       "1341  John Quincy Archibald is a father and husband ...   \n",
1556
       "3758  Amanda (Marlee Maitlin) is a divorced woman wh...   \n",
1557
       "2423  Based on Wes Craven's 1977 suspenseful cult cl...   \n",
1558
       "297   An ex-mercenary turned smuggler. A Mende fishe...   \n",
1559
       "\n",
1560
       "                         genres  \\\n",
1561
       "4702           [Drama, Romance]   \n",
1562
       "1341   [Drama, Thriller, Crime]   \n",
1563
       "3758              [Documentary]   \n",
1564
       "2423         [Horror, Thriller]   \n",
1565
       "297   [Drama, Thriller, Action]   \n",
1566
       "\n",
1567
       "                                               keywords  \\\n",
1568
       "4702  [gay, great britain, one-night stand, independ...   \n",
1569
       "1341  [father son relationship, chicago, heart attac...   \n",
1570
       "3758  [alternate dimension, new age, parallel world,...   \n",
1571
       "2423  [ambush, new mexico, van, family holiday, axe ...   \n",
1572
       "297   [rebel, journalist, journalism, loss of family...   \n",
1573
       "\n",
1574
       "                                                   cast  \\\n",
1575
       "4702             [Tom Cullen, Chris New, Jonathan Race]   \n",
1576
       "1341     [Denzel Washington, Robert Duvall, Anne Heche]   \n",
1577
       "3758    [Marlee Matlin, Elaine Hendrix, Robert Blanche]   \n",
1578
       "2423   [Aaron Stanford, Kathleen Quinlan, Vinessa Shaw]   \n",
1579
       "297   [Leonardo DiCaprio, Djimon Hounsou, Jennifer C...   \n",
1580
       "\n",
1581
       "                               crew  \n",
1582
       "4702                 [Andrew Haigh]  \n",
1583
       "1341              [Nick Cassavetes]  \n",
1584
       "3758  [William Arntz, Betsy Chasse]  \n",
1585
       "2423                [Alexandre Aja]  \n",
1586
       "297                  [Edward Zwick]  "
1587
      ]
1588
     },
1589
     "execution_count": 24,
1590
     "metadata": {},
1591
     "output_type": "execute_result"
1592
    }
1593
   ],
1594
   "source": [
1595
    "#movies['overview'] = movies['overview'].apply(lambda x:x.split())\n",
1596
    "movies.sample(5)"
1597
   ]
1598
  },
1599
  {
1600
   "cell_type": "code",
1601
   "execution_count": 25,
1602
   "id": "77acdc21",
1603
   "metadata": {},
1604
   "outputs": [],
1605
   "source": [
1606
    "def collapse(L):\n",
1607
    "    L1 = []\n",
1608
    "    for i in L:\n",
1609
    "        L1.append(i.replace(\" \",\"\"))\n",
1610
    "    return L1"
1611
   ]
1612
  },
1613
  {
1614
   "cell_type": "code",
1615
   "execution_count": 26,
1616
   "id": "b3439e7c",
1617
   "metadata": {},
1618
   "outputs": [],
1619
   "source": [
1620
    "movies['cast'] = movies['cast'].apply(collapse)\n",
1621
    "movies['crew'] = movies['crew'].apply(collapse)\n",
1622
    "movies['genres'] = movies['genres'].apply(collapse)\n",
1623
    "movies['keywords'] = movies['keywords'].apply(collapse)"
1624
   ]
1625
  },
1626
  {
1627
   "cell_type": "code",
1628
   "execution_count": 27,
1629
   "id": "5684929f",
1630
   "metadata": {},
1631
   "outputs": [
1632
    {
1633
     "data": {
1634
      "text/html": [
1635
       "<div>\n",
1636
       "<style scoped>\n",
1637
       "    .dataframe tbody tr th:only-of-type {\n",
1638
       "        vertical-align: middle;\n",
1639
       "    }\n",
1640
       "\n",
1641
       "    .dataframe tbody tr th {\n",
1642
       "        vertical-align: top;\n",
1643
       "    }\n",
1644
       "\n",
1645
       "    .dataframe thead th {\n",
1646
       "        text-align: right;\n",
1647
       "    }\n",
1648
       "</style>\n",
1649
       "<table border=\"1\" class=\"dataframe\">\n",
1650
       "  <thead>\n",
1651
       "    <tr style=\"text-align: right;\">\n",
1652
       "      <th></th>\n",
1653
       "      <th>movie_id</th>\n",
1654
       "      <th>title</th>\n",
1655
       "      <th>overview</th>\n",
1656
       "      <th>genres</th>\n",
1657
       "      <th>keywords</th>\n",
1658
       "      <th>cast</th>\n",
1659
       "      <th>crew</th>\n",
1660
       "    </tr>\n",
1661
       "  </thead>\n",
1662
       "  <tbody>\n",
1663
       "    <tr>\n",
1664
       "      <th>0</th>\n",
1665
       "      <td>19995</td>\n",
1666
       "      <td>Avatar</td>\n",
1667
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
1668
       "      <td>[Action, Adventure, Fantasy, ScienceFiction]</td>\n",
1669
       "      <td>[cultureclash, future, spacewar, spacecolony, ...</td>\n",
1670
       "      <td>[SamWorthington, ZoeSaldana, SigourneyWeaver]</td>\n",
1671
       "      <td>[JamesCameron]</td>\n",
1672
       "    </tr>\n",
1673
       "    <tr>\n",
1674
       "      <th>1</th>\n",
1675
       "      <td>285</td>\n",
1676
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
1677
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
1678
       "      <td>[Adventure, Fantasy, Action]</td>\n",
1679
       "      <td>[ocean, drugabuse, exoticisland, eastindiatrad...</td>\n",
1680
       "      <td>[JohnnyDepp, OrlandoBloom, KeiraKnightley]</td>\n",
1681
       "      <td>[GoreVerbinski]</td>\n",
1682
       "    </tr>\n",
1683
       "    <tr>\n",
1684
       "      <th>2</th>\n",
1685
       "      <td>206647</td>\n",
1686
       "      <td>Spectre</td>\n",
1687
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
1688
       "      <td>[Action, Adventure, Crime]</td>\n",
1689
       "      <td>[spy, basedonnovel, secretagent, sequel, mi6, ...</td>\n",
1690
       "      <td>[DanielCraig, ChristophWaltz, LéaSeydoux]</td>\n",
1691
       "      <td>[SamMendes]</td>\n",
1692
       "    </tr>\n",
1693
       "    <tr>\n",
1694
       "      <th>3</th>\n",
1695
       "      <td>49026</td>\n",
1696
       "      <td>The Dark Knight Rises</td>\n",
1697
       "      <td>Following the death of District Attorney Harve...</td>\n",
1698
       "      <td>[Action, Crime, Drama, Thriller]</td>\n",
1699
       "      <td>[dccomics, crimefighter, terrorist, secretiden...</td>\n",
1700
       "      <td>[ChristianBale, MichaelCaine, GaryOldman]</td>\n",
1701
       "      <td>[ChristopherNolan]</td>\n",
1702
       "    </tr>\n",
1703
       "    <tr>\n",
1704
       "      <th>4</th>\n",
1705
       "      <td>49529</td>\n",
1706
       "      <td>John Carter</td>\n",
1707
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
1708
       "      <td>[Action, Adventure, ScienceFiction]</td>\n",
1709
       "      <td>[basedonnovel, mars, medallion, spacetravel, p...</td>\n",
1710
       "      <td>[TaylorKitsch, LynnCollins, SamanthaMorton]</td>\n",
1711
       "      <td>[AndrewStanton]</td>\n",
1712
       "    </tr>\n",
1713
       "  </tbody>\n",
1714
       "</table>\n",
1715
       "</div>"
1716
      ],
1717
      "text/plain": [
1718
       "   movie_id                                     title  \\\n",
1719
       "0     19995                                    Avatar   \n",
1720
       "1       285  Pirates of the Caribbean: At World's End   \n",
1721
       "2    206647                                   Spectre   \n",
1722
       "3     49026                     The Dark Knight Rises   \n",
1723
       "4     49529                               John Carter   \n",
1724
       "\n",
1725
       "                                            overview  \\\n",
1726
       "0  In the 22nd century, a paraplegic Marine is di...   \n",
1727
       "1  Captain Barbossa, long believed to be dead, ha...   \n",
1728
       "2  A cryptic message from Bond’s past sends him o...   \n",
1729
       "3  Following the death of District Attorney Harve...   \n",
1730
       "4  John Carter is a war-weary, former military ca...   \n",
1731
       "\n",
1732
       "                                         genres  \\\n",
1733
       "0  [Action, Adventure, Fantasy, ScienceFiction]   \n",
1734
       "1                  [Adventure, Fantasy, Action]   \n",
1735
       "2                    [Action, Adventure, Crime]   \n",
1736
       "3              [Action, Crime, Drama, Thriller]   \n",
1737
       "4           [Action, Adventure, ScienceFiction]   \n",
1738
       "\n",
1739
       "                                            keywords  \\\n",
1740
       "0  [cultureclash, future, spacewar, spacecolony, ...   \n",
1741
       "1  [ocean, drugabuse, exoticisland, eastindiatrad...   \n",
1742
       "2  [spy, basedonnovel, secretagent, sequel, mi6, ...   \n",
1743
       "3  [dccomics, crimefighter, terrorist, secretiden...   \n",
1744
       "4  [basedonnovel, mars, medallion, spacetravel, p...   \n",
1745
       "\n",
1746
       "                                            cast                crew  \n",
1747
       "0  [SamWorthington, ZoeSaldana, SigourneyWeaver]      [JamesCameron]  \n",
1748
       "1     [JohnnyDepp, OrlandoBloom, KeiraKnightley]     [GoreVerbinski]  \n",
1749
       "2      [DanielCraig, ChristophWaltz, LéaSeydoux]         [SamMendes]  \n",
1750
       "3      [ChristianBale, MichaelCaine, GaryOldman]  [ChristopherNolan]  \n",
1751
       "4    [TaylorKitsch, LynnCollins, SamanthaMorton]     [AndrewStanton]  "
1752
      ]
1753
     },
1754
     "execution_count": 27,
1755
     "metadata": {},
1756
     "output_type": "execute_result"
1757
    }
1758
   ],
1759
   "source": [
1760
    "movies.head()"
1761
   ]
1762
  },
1763
  {
1764
   "cell_type": "code",
1765
   "execution_count": 28,
1766
   "id": "25b16cae",
1767
   "metadata": {},
1768
   "outputs": [],
1769
   "source": [
1770
    "movies['overview'] = movies['overview'].apply(lambda x:x.split())"
1771
   ]
1772
  },
1773
  {
1774
   "cell_type": "code",
1775
   "execution_count": 29,
1776
   "id": "a4cf3ba0",
1777
   "metadata": {},
1778
   "outputs": [],
1779
   "source": [
1780
    "movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']"
1781
   ]
1782
  },
1783
  {
1784
   "cell_type": "code",
1785
   "execution_count": 30,
1786
   "id": "5b85a9ee",
1787
   "metadata": {},
1788
   "outputs": [
1789
    {
1790
     "data": {
1791
      "text/html": [
1792
       "<div>\n",
1793
       "<style scoped>\n",
1794
       "    .dataframe tbody tr th:only-of-type {\n",
1795
       "        vertical-align: middle;\n",
1796
       "    }\n",
1797
       "\n",
1798
       "    .dataframe tbody tr th {\n",
1799
       "        vertical-align: top;\n",
1800
       "    }\n",
1801
       "\n",
1802
       "    .dataframe thead th {\n",
1803
       "        text-align: right;\n",
1804
       "    }\n",
1805
       "</style>\n",
1806
       "<table border=\"1\" class=\"dataframe\">\n",
1807
       "  <thead>\n",
1808
       "    <tr style=\"text-align: right;\">\n",
1809
       "      <th></th>\n",
1810
       "      <th>movie_id</th>\n",
1811
       "      <th>title</th>\n",
1812
       "      <th>overview</th>\n",
1813
       "      <th>genres</th>\n",
1814
       "      <th>keywords</th>\n",
1815
       "      <th>cast</th>\n",
1816
       "      <th>crew</th>\n",
1817
       "      <th>tags</th>\n",
1818
       "    </tr>\n",
1819
       "  </thead>\n",
1820
       "  <tbody>\n",
1821
       "    <tr>\n",
1822
       "      <th>0</th>\n",
1823
       "      <td>19995</td>\n",
1824
       "      <td>Avatar</td>\n",
1825
       "      <td>[In, the, 22nd, century,, a, paraplegic, Marin...</td>\n",
1826
       "      <td>[Action, Adventure, Fantasy, ScienceFiction]</td>\n",
1827
       "      <td>[cultureclash, future, spacewar, spacecolony, ...</td>\n",
1828
       "      <td>[SamWorthington, ZoeSaldana, SigourneyWeaver]</td>\n",
1829
       "      <td>[JamesCameron]</td>\n",
1830
       "      <td>[In, the, 22nd, century,, a, paraplegic, Marin...</td>\n",
1831
       "    </tr>\n",
1832
       "    <tr>\n",
1833
       "      <th>1</th>\n",
1834
       "      <td>285</td>\n",
1835
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
1836
       "      <td>[Captain, Barbossa,, long, believed, to, be, d...</td>\n",
1837
       "      <td>[Adventure, Fantasy, Action]</td>\n",
1838
       "      <td>[ocean, drugabuse, exoticisland, eastindiatrad...</td>\n",
1839
       "      <td>[JohnnyDepp, OrlandoBloom, KeiraKnightley]</td>\n",
1840
       "      <td>[GoreVerbinski]</td>\n",
1841
       "      <td>[Captain, Barbossa,, long, believed, to, be, d...</td>\n",
1842
       "    </tr>\n",
1843
       "    <tr>\n",
1844
       "      <th>2</th>\n",
1845
       "      <td>206647</td>\n",
1846
       "      <td>Spectre</td>\n",
1847
       "      <td>[A, cryptic, message, from, Bond’s, past, send...</td>\n",
1848
       "      <td>[Action, Adventure, Crime]</td>\n",
1849
       "      <td>[spy, basedonnovel, secretagent, sequel, mi6, ...</td>\n",
1850
       "      <td>[DanielCraig, ChristophWaltz, LéaSeydoux]</td>\n",
1851
       "      <td>[SamMendes]</td>\n",
1852
       "      <td>[A, cryptic, message, from, Bond’s, past, send...</td>\n",
1853
       "    </tr>\n",
1854
       "    <tr>\n",
1855
       "      <th>3</th>\n",
1856
       "      <td>49026</td>\n",
1857
       "      <td>The Dark Knight Rises</td>\n",
1858
       "      <td>[Following, the, death, of, District, Attorney...</td>\n",
1859
       "      <td>[Action, Crime, Drama, Thriller]</td>\n",
1860
       "      <td>[dccomics, crimefighter, terrorist, secretiden...</td>\n",
1861
       "      <td>[ChristianBale, MichaelCaine, GaryOldman]</td>\n",
1862
       "      <td>[ChristopherNolan]</td>\n",
1863
       "      <td>[Following, the, death, of, District, Attorney...</td>\n",
1864
       "    </tr>\n",
1865
       "    <tr>\n",
1866
       "      <th>4</th>\n",
1867
       "      <td>49529</td>\n",
1868
       "      <td>John Carter</td>\n",
1869
       "      <td>[John, Carter, is, a, war-weary,, former, mili...</td>\n",
1870
       "      <td>[Action, Adventure, ScienceFiction]</td>\n",
1871
       "      <td>[basedonnovel, mars, medallion, spacetravel, p...</td>\n",
1872
       "      <td>[TaylorKitsch, LynnCollins, SamanthaMorton]</td>\n",
1873
       "      <td>[AndrewStanton]</td>\n",
1874
       "      <td>[John, Carter, is, a, war-weary,, former, mili...</td>\n",
1875
       "    </tr>\n",
1876
       "  </tbody>\n",
1877
       "</table>\n",
1878
       "</div>"
1879
      ],
1880
      "text/plain": [
1881
       "   movie_id                                     title  \\\n",
1882
       "0     19995                                    Avatar   \n",
1883
       "1       285  Pirates of the Caribbean: At World's End   \n",
1884
       "2    206647                                   Spectre   \n",
1885
       "3     49026                     The Dark Knight Rises   \n",
1886
       "4     49529                               John Carter   \n",
1887
       "\n",
1888
       "                                            overview  \\\n",
1889
       "0  [In, the, 22nd, century,, a, paraplegic, Marin...   \n",
1890
       "1  [Captain, Barbossa,, long, believed, to, be, d...   \n",
1891
       "2  [A, cryptic, message, from, Bond’s, past, send...   \n",
1892
       "3  [Following, the, death, of, District, Attorney...   \n",
1893
       "4  [John, Carter, is, a, war-weary,, former, mili...   \n",
1894
       "\n",
1895
       "                                         genres  \\\n",
1896
       "0  [Action, Adventure, Fantasy, ScienceFiction]   \n",
1897
       "1                  [Adventure, Fantasy, Action]   \n",
1898
       "2                    [Action, Adventure, Crime]   \n",
1899
       "3              [Action, Crime, Drama, Thriller]   \n",
1900
       "4           [Action, Adventure, ScienceFiction]   \n",
1901
       "\n",
1902
       "                                            keywords  \\\n",
1903
       "0  [cultureclash, future, spacewar, spacecolony, ...   \n",
1904
       "1  [ocean, drugabuse, exoticisland, eastindiatrad...   \n",
1905
       "2  [spy, basedonnovel, secretagent, sequel, mi6, ...   \n",
1906
       "3  [dccomics, crimefighter, terrorist, secretiden...   \n",
1907
       "4  [basedonnovel, mars, medallion, spacetravel, p...   \n",
1908
       "\n",
1909
       "                                            cast                crew  \\\n",
1910
       "0  [SamWorthington, ZoeSaldana, SigourneyWeaver]      [JamesCameron]   \n",
1911
       "1     [JohnnyDepp, OrlandoBloom, KeiraKnightley]     [GoreVerbinski]   \n",
1912
       "2      [DanielCraig, ChristophWaltz, LéaSeydoux]         [SamMendes]   \n",
1913
       "3      [ChristianBale, MichaelCaine, GaryOldman]  [ChristopherNolan]   \n",
1914
       "4    [TaylorKitsch, LynnCollins, SamanthaMorton]     [AndrewStanton]   \n",
1915
       "\n",
1916
       "                                                tags  \n",
1917
       "0  [In, the, 22nd, century,, a, paraplegic, Marin...  \n",
1918
       "1  [Captain, Barbossa,, long, believed, to, be, d...  \n",
1919
       "2  [A, cryptic, message, from, Bond’s, past, send...  \n",
1920
       "3  [Following, the, death, of, District, Attorney...  \n",
1921
       "4  [John, Carter, is, a, war-weary,, former, mili...  "
1922
      ]
1923
     },
1924
     "execution_count": 30,
1925
     "metadata": {},
1926
     "output_type": "execute_result"
1927
    }
1928
   ],
1929
   "source": [
1930
    "movies.head()"
1931
   ]
1932
  },
1933
  {
1934
   "cell_type": "code",
1935
   "execution_count": 31,
1936
   "id": "c8e5db66",
1937
   "metadata": {},
1938
   "outputs": [],
1939
   "source": [
1940
    "new = movies.drop(columns=['overview','genres','keywords','cast','crew'])\n",
1941
    "#new.head()"
1942
   ]
1943
  },
1944
  {
1945
   "cell_type": "code",
1946
   "execution_count": 32,
1947
   "id": "86337c80",
1948
   "metadata": {},
1949
   "outputs": [
1950
    {
1951
     "data": {
1952
      "text/html": [
1953
       "<div>\n",
1954
       "<style scoped>\n",
1955
       "    .dataframe tbody tr th:only-of-type {\n",
1956
       "        vertical-align: middle;\n",
1957
       "    }\n",
1958
       "\n",
1959
       "    .dataframe tbody tr th {\n",
1960
       "        vertical-align: top;\n",
1961
       "    }\n",
1962
       "\n",
1963
       "    .dataframe thead th {\n",
1964
       "        text-align: right;\n",
1965
       "    }\n",
1966
       "</style>\n",
1967
       "<table border=\"1\" class=\"dataframe\">\n",
1968
       "  <thead>\n",
1969
       "    <tr style=\"text-align: right;\">\n",
1970
       "      <th></th>\n",
1971
       "      <th>movie_id</th>\n",
1972
       "      <th>title</th>\n",
1973
       "      <th>tags</th>\n",
1974
       "    </tr>\n",
1975
       "  </thead>\n",
1976
       "  <tbody>\n",
1977
       "    <tr>\n",
1978
       "      <th>0</th>\n",
1979
       "      <td>19995</td>\n",
1980
       "      <td>Avatar</td>\n",
1981
       "      <td>In the 22nd century, a paraplegic Marine is di...</td>\n",
1982
       "    </tr>\n",
1983
       "    <tr>\n",
1984
       "      <th>1</th>\n",
1985
       "      <td>285</td>\n",
1986
       "      <td>Pirates of the Caribbean: At World's End</td>\n",
1987
       "      <td>Captain Barbossa, long believed to be dead, ha...</td>\n",
1988
       "    </tr>\n",
1989
       "    <tr>\n",
1990
       "      <th>2</th>\n",
1991
       "      <td>206647</td>\n",
1992
       "      <td>Spectre</td>\n",
1993
       "      <td>A cryptic message from Bond’s past sends him o...</td>\n",
1994
       "    </tr>\n",
1995
       "    <tr>\n",
1996
       "      <th>3</th>\n",
1997
       "      <td>49026</td>\n",
1998
       "      <td>The Dark Knight Rises</td>\n",
1999
       "      <td>Following the death of District Attorney Harve...</td>\n",
2000
       "    </tr>\n",
2001
       "    <tr>\n",
2002
       "      <th>4</th>\n",
2003
       "      <td>49529</td>\n",
2004
       "      <td>John Carter</td>\n",
2005
       "      <td>John Carter is a war-weary, former military ca...</td>\n",
2006
       "    </tr>\n",
2007
       "  </tbody>\n",
2008
       "</table>\n",
2009
       "</div>"
2010
      ],
2011
      "text/plain": [
2012
       "   movie_id                                     title  \\\n",
2013
       "0     19995                                    Avatar   \n",
2014
       "1       285  Pirates of the Caribbean: At World's End   \n",
2015
       "2    206647                                   Spectre   \n",
2016
       "3     49026                     The Dark Knight Rises   \n",
2017
       "4     49529                               John Carter   \n",
2018
       "\n",
2019
       "                                                tags  \n",
2020
       "0  In the 22nd century, a paraplegic Marine is di...  \n",
2021
       "1  Captain Barbossa, long believed to be dead, ha...  \n",
2022
       "2  A cryptic message from Bond’s past sends him o...  \n",
2023
       "3  Following the death of District Attorney Harve...  \n",
2024
       "4  John Carter is a war-weary, former military ca...  "
2025
      ]
2026
     },
2027
     "execution_count": 32,
2028
     "metadata": {},
2029
     "output_type": "execute_result"
2030
    }
2031
   ],
2032
   "source": [
2033
    "new['tags'] = new['tags'].apply(lambda x: \" \".join(x))\n",
2034
    "new.head()"
2035
   ]
2036
  },
2037
  {
2038
   "cell_type": "code",
2039
   "execution_count": 33,
2040
   "id": "b2096072",
2041
   "metadata": {},
2042
   "outputs": [],
2043
   "source": [
2044
    "from sklearn.feature_extraction.text import CountVectorizer\n",
2045
    "cv = CountVectorizer(max_features=5000,stop_words='english')"
2046
   ]
2047
  },
2048
  {
2049
   "cell_type": "code",
2050
   "execution_count": 34,
2051
   "id": "78653bfc",
2052
   "metadata": {},
2053
   "outputs": [],
2054
   "source": [
2055
    "vector = cv.fit_transform(new['tags']).toarray()"
2056
   ]
2057
  },
2058
  {
2059
   "cell_type": "code",
2060
   "execution_count": 35,
2061
   "id": "90f36092",
2062
   "metadata": {},
2063
   "outputs": [
2064
    {
2065
     "data": {
2066
      "text/plain": [
2067
       "array([[0, 0, 0, ..., 0, 0, 0],\n",
2068
       "       [0, 0, 0, ..., 0, 0, 0],\n",
2069
       "       [0, 0, 0, ..., 0, 0, 0],\n",
2070
       "       ...,\n",
2071
       "       [0, 0, 0, ..., 0, 0, 0],\n",
2072
       "       [0, 0, 0, ..., 0, 0, 0],\n",
2073
       "       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)"
2074
      ]
2075
     },
2076
     "execution_count": 35,
2077
     "metadata": {},
2078
     "output_type": "execute_result"
2079
    }
2080
   ],
2081
   "source": [
2082
    "vector"
2083
   ]
2084
  },
2085
  {
2086
   "cell_type": "code",
2087
   "execution_count": 36,
2088
   "id": "e4a28860",
2089
   "metadata": {},
2090
   "outputs": [
2091
    {
2092
     "data": {
2093
      "text/plain": [
2094
       "array([0, 0, 0, ..., 0, 0, 0], dtype=int64)"
2095
      ]
2096
     },
2097
     "execution_count": 36,
2098
     "metadata": {},
2099
     "output_type": "execute_result"
2100
    }
2101
   ],
2102
   "source": [
2103
    "vector[0]"
2104
   ]
2105
  },
2106
  {
2107
   "cell_type": "code",
2108
   "execution_count": 37,
2109
   "id": "d7809e16",
2110
   "metadata": {},
2111
   "outputs": [],
2112
   "source": [
2113
    "# cv.get_feature_names()"
2114
   ]
2115
  },
2116
  {
2117
   "cell_type": "code",
2118
   "execution_count": 38,
2119
   "id": "4b71c2f2",
2120
   "metadata": {},
2121
   "outputs": [
2122
    {
2123
     "data": {
2124
      "text/plain": [
2125
       "(4806, 5000)"
2126
      ]
2127
     },
2128
     "execution_count": 38,
2129
     "metadata": {},
2130
     "output_type": "execute_result"
2131
    }
2132
   ],
2133
   "source": [
2134
    "vector.shape"
2135
   ]
2136
  },
2137
  {
2138
   "cell_type": "code",
2139
   "execution_count": 39,
2140
   "id": "12a944fa",
2141
   "metadata": {},
2142
   "outputs": [],
2143
   "source": [
2144
    "import nltk"
2145
   ]
2146
  },
2147
  {
2148
   "cell_type": "code",
2149
   "execution_count": 40,
2150
   "id": "4abe934e",
2151
   "metadata": {},
2152
   "outputs": [],
2153
   "source": [
2154
    "from nltk.stem.porter import PorterStemmer\n",
2155
    "ps= PorterStemmer()"
2156
   ]
2157
  },
2158
  {
2159
   "cell_type": "code",
2160
   "execution_count": 41,
2161
   "id": "0ff40f63",
2162
   "metadata": {},
2163
   "outputs": [],
2164
   "source": [
2165
    "def stem(text):\n",
2166
    "    y=[]\n",
2167
    "    \n",
2168
    "    for i in text.split():\n",
2169
    "        y.append(ps.stem(i))\n",
2170
    "        \n",
2171
    "    return \" \".join(y)"
2172
   ]
2173
  },
2174
  {
2175
   "cell_type": "code",
2176
   "execution_count": 42,
2177
   "id": "b98e0748",
2178
   "metadata": {},
2179
   "outputs": [],
2180
   "source": [
2181
    "new['tags'] = new['tags'].apply(stem) #stem is used to combine words of same meaning in one word"
2182
   ]
2183
  },
2184
  {
2185
   "cell_type": "code",
2186
   "execution_count": 43,
2187
   "id": "a8bab646",
2188
   "metadata": {},
2189
   "outputs": [],
2190
   "source": [
2191
    "from sklearn.metrics.pairwise import cosine_similarity \n"
2192
   ]
2193
  },
2194
  {
2195
   "cell_type": "code",
2196
   "execution_count": 44,
2197
   "id": "03d53db1",
2198
   "metadata": {},
2199
   "outputs": [],
2200
   "source": [
2201
    "similarity = cosine_similarity(vector) #calculate angle btw to vectors and find the sisimilarities\n"
2202
   ]
2203
  },
2204
  {
2205
   "cell_type": "code",
2206
   "execution_count": 45,
2207
   "id": "289f5e53",
2208
   "metadata": {},
2209
   "outputs": [
2210
    {
2211
     "data": {
2212
      "text/plain": [
2213
       "array([[1.        , 0.08964215, 0.06071767, ..., 0.02519763, 0.0277885 ,\n",
2214
       "        0.        ],\n",
2215
       "       [0.08964215, 1.        , 0.06350006, ..., 0.02635231, 0.        ,\n",
2216
       "        0.        ],\n",
2217
       "       [0.06071767, 0.06350006, 1.        , ..., 0.02677398, 0.        ,\n",
2218
       "        0.        ],\n",
2219
       "       ...,\n",
2220
       "       [0.02519763, 0.02635231, 0.02677398, ..., 1.        , 0.07352146,\n",
2221
       "        0.04774099],\n",
2222
       "       [0.0277885 , 0.        , 0.        , ..., 0.07352146, 1.        ,\n",
2223
       "        0.05264981],\n",
2224
       "       [0.        , 0.        , 0.        , ..., 0.04774099, 0.05264981,\n",
2225
       "        1.        ]])"
2226
      ]
2227
     },
2228
     "execution_count": 45,
2229
     "metadata": {},
2230
     "output_type": "execute_result"
2231
    }
2232
   ],
2233
   "source": [
2234
    "similarity\n"
2235
   ]
2236
  },
2237
  {
2238
   "cell_type": "code",
2239
   "execution_count": 46,
2240
   "id": "878adeff",
2241
   "metadata": {},
2242
   "outputs": [
2243
    {
2244
     "data": {
2245
      "text/plain": [
2246
       "744"
2247
      ]
2248
     },
2249
     "execution_count": 46,
2250
     "metadata": {},
2251
     "output_type": "execute_result"
2252
    }
2253
   ],
2254
   "source": [
2255
    "new[new['title'] == 'The Lego Movie'].index[0]\n"
2256
   ]
2257
  },
2258
  {
2259
   "cell_type": "code",
2260
   "execution_count": 47,
2261
   "id": "00337383",
2262
   "metadata": {},
2263
   "outputs": [],
2264
   "source": [
2265
    "def recommend(movie):\n",
2266
    "    index = new[new['title'] == movie].index[0]\n",
2267
    "    movie_list = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])\n",
2268
    "    for i in movie_list[1:6]:\n",
2269
    "        print(new.iloc[i[0]].title)"
2270
   ]
2271
  },
2272
  {
2273
   "cell_type": "code",
2274
   "execution_count": 48,
2275
   "id": "67047f9b",
2276
   "metadata": {},
2277
   "outputs": [
2278
    {
2279
     "name": "stdout",
2280
     "output_type": "stream",
2281
     "text": [
2282
      "The Dark Knight\n",
2283
      "The Dark Knight Rises\n",
2284
      "Batman\n",
2285
      "Batman & Robin\n",
2286
      "Batman\n"
2287
     ]
2288
    }
2289
   ],
2290
   "source": [
2291
    "recommend('Batman Begins')\n"
2292
   ]
2293
  },
2294
  {
2295
   "cell_type": "code",
2296
   "execution_count": 49,
2297
   "id": "ef4ae40b",
2298
   "metadata": {},
2299
   "outputs": [],
2300
   "source": [
2301
    "import pickle"
2302
   ]
2303
  },
2304
  {
2305
   "cell_type": "code",
2306
   "execution_count": 50,
2307
   "id": "421e14ba",
2308
   "metadata": {},
2309
   "outputs": [],
2310
   "source": [
2311
    "pickle.dump(new,open('movie_list.pkl','wb'))\n",
2312
    "pickle.dump(similarity,open('similarity.pkl','wb'))"
2313
   ]
2314
  },
2315
  {
2316
   "cell_type": "code",
2317
   "execution_count": 53,
2318
   "id": "e8c97e90",
2319
   "metadata": {},
2320
   "outputs": [
2321
    {
2322
     "name": "stdout",
2323
     "output_type": "stream",
2324
     "text": [
2325
      "Requirement already satisfied: pandas in d:\\desktop\\anaconda23\\lib\\site-packages (1.5.3)\n",
2326
      "Requirement already satisfied: numpy>=1.21.0 in d:\\desktop\\anaconda23\\lib\\site-packages (from pandas) (1.23.5)\n",
2327
      "Requirement already satisfied: python-dateutil>=2.8.1 in d:\\desktop\\anaconda23\\lib\\site-packages (from pandas) (2.8.2)\n",
2328
      "Requirement already satisfied: pytz>=2020.1 in d:\\desktop\\anaconda23\\lib\\site-packages (from pandas) (2022.7)\n",
2329
      "Requirement already satisfied: six>=1.5 in d:\\desktop\\anaconda23\\lib\\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n"
2330
     ]
2331
    }
2332
   ],
2333
   "source": [
2334
    "!pip install pandas --version"
2335
   ]
2336
  },
2337
  {
2338
   "cell_type": "code",
2339
   "execution_count": 55,
2340
   "id": "33b2c364",
2341
   "metadata": {},
2342
   "outputs": [
2343
    {
2344
     "name": "stdout",
2345
     "output_type": "stream",
2346
     "text": [
2347
      "1.5.3\n"
2348
     ]
2349
    }
2350
   ],
2351
   "source": [
2352
    "!python -c \"import pandas as pd; print(pd.__version__)\"\n"
2353
   ]
2354
  }
2355
 ],
2356
 "metadata": {
2357
  "kernelspec": {
2358
   "display_name": "Python 3 (ipykernel)",
2359
   "language": "python",
2360
   "name": "python3"
2361
  },
2362
  "language_info": {
2363
   "codemirror_mode": {
2364
    "name": "ipython",
2365
    "version": 3
2366
   },
2367
   "file_extension": ".py",
2368
   "mimetype": "text/x-python",
2369
   "name": "python",
2370
   "nbconvert_exporter": "python",
2371
   "pygments_lexer": "ipython3",
2372
   "version": "3.10.9"
2373
  }
2374
 },
2375
 "nbformat": 4,
2376
 "nbformat_minor": 5
2377
}
2378

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.