{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.14","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"None","dataSources":[{"sourceType":"datasetVersion","sourceId":9375511,"datasetId":3816617,"databundleVersionId":9575818}],"dockerImageVersionId":30761,"isInternetEnabled":True,"language":"python","sourceType":"notebook","isGpuEnabled":False}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2024-09-12T13:59:21.117080Z","iopub.execute_input":"2024-09-12T13:59:21.118163Z","iopub.status.idle":"2024-09-12T13:59:21.575338Z","shell.execute_reply.started":"2024-09-12T13:59:21.118087Z","shell.execute_reply":"2024-09-12T13:59:21.574006Z"},"trusted":True},"execution_count":1,"outputs":[{"name":"stdout","text":"/kaggle/input/tmdb-movies-dataset-2023-930k-movies/TMDB_movie_dataset_v11.csv\n","output_type":"stream"}]},{"cell_type":"code","source":"import pandas as pd\nimport numpy as np\nimport matplotlib as plt\nimport seaborn as sns","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:21.577852Z","iopub.execute_input":"2024-09-12T13:59:21.578449Z","iopub.status.idle":"2024-09-12T13:59:23.377528Z","shell.execute_reply.started":"2024-09-12T13:59:21.578400Z","shell.execute_reply":"2024-09-12T13:59:23.376274Z"},"trusted":True},"execution_count":2,"outputs":[]},{"cell_type":"code","source":"main_df = pd.read_csv('/kaggle/input/tmdb-movies-dataset-2023-930k-movies/TMDB_movie_dataset_v11.csv')","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:23.379255Z","iopub.execute_input":"2024-09-12T13:59:23.380173Z","iopub.status.idle":"2024-09-12T13:59:43.297257Z","shell.execute_reply.started":"2024-09-12T13:59:23.380095Z","shell.execute_reply":"2024-09-12T13:59:43.295964Z"},"trusted":True},"execution_count":3,"outputs":[]},{"cell_type":"code","source":"df = main_df[main_df['vote_average']!=0]","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:43.299813Z","iopub.execute_input":"2024-09-12T13:59:43.300233Z","iopub.status.idle":"2024-09-12T13:59:43.410857Z","shell.execute_reply.started":"2024-09-12T13:59:43.300192Z","shell.execute_reply":"2024-09-12T13:59:43.409226Z"},"trusted":True},"execution_count":4,"outputs":[]},{"cell_type":"code","source":"df.reset_index(inplace=True)","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:43.412601Z","iopub.execute_input":"2024-09-12T13:59:43.413707Z","iopub.status.idle":"2024-09-12T13:59:43.422433Z","shell.execute_reply.started":"2024-09-12T13:59:43.413646Z","shell.execute_reply":"2024-09-12T13:59:43.421328Z"},"trusted":True},"execution_count":5,"outputs":[]},{"cell_type":"code","source":"df.shape","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:43.423990Z","iopub.execute_input":"2024-09-12T13:59:43.424908Z","iopub.status.idle":"2024-09-12T13:59:43.437036Z","shell.execute_reply.started":"2024-09-12T13:59:43.424856Z","shell.execute_reply":"2024-09-12T13:59:43.435582Z"},"trusted":True},"execution_count":6,"outputs":[{"execution_count":6,"output_type":"execute_result","data":{"text/plain":"(349038, 25)"},"metadata":{}}]},{"cell_type":"markdown","source":"# Features Selection","metadata":{}},{"cell_type":"code","source":"df.columns","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:43.438401Z","iopub.execute_input":"2024-09-12T13:59:43.438777Z","iopub.status.idle":"2024-09-12T13:59:43.453587Z","shell.execute_reply.started":"2024-09-12T13:59:43.438739Z","shell.execute_reply":"2024-09-12T13:59:43.452347Z"},"trusted":True},"execution_count":7,"outputs":[{"execution_count":7,"output_type":"execute_result","data":{"text/plain":"Index(['index', 'id', 'title', 'vote_average', 'vote_count', 'status',\n 'release_date', 'revenue', 'runtime', 'adult', 'backdrop_path',\n 'budget', 'homepage', 'imdb_id', 'original_language', 'original_title',\n 'overview', 'popularity', 'poster_path', 'tagline', 'genres',\n 'production_companies', 'production_countries', 'spoken_languages',\n 'keywords'],\n dtype='object')"},"metadata":{}}]},{"cell_type":"code","source":"df = df.drop( ['id' , 'vote_count' , 'status' , 'release_date', 'revenue' , 'backdrop_path',\n 'budget','homepage','imdb_id','original_title' , 'overview','poster_path',\n 'tagline' , 'production_companies','production_countries' ,'spoken_languages' ,'keywords'], axis=1)","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:43.455428Z","iopub.execute_input":"2024-09-12T13:59:43.455860Z","iopub.status.idle":"2024-09-12T13:59:43.514130Z","shell.execute_reply.started":"2024-09-12T13:59:43.455797Z","shell.execute_reply":"2024-09-12T13:59:43.513049Z"},"trusted":True},"execution_count":8,"outputs":[]},{"cell_type":"code","source":"df['org_title']=df['title']","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:43.515749Z","iopub.execute_input":"2024-09-12T13:59:43.516161Z","iopub.status.idle":"2024-09-12T13:59:43.526800Z","shell.execute_reply.started":"2024-09-12T13:59:43.516098Z","shell.execute_reply":"2024-09-12T13:59:43.525164Z"},"trusted":True},"execution_count":9,"outputs":[]},{"cell_type":"code","source":"df.isna().sum()","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:43.531013Z","iopub.execute_input":"2024-09-12T13:59:43.531509Z","iopub.status.idle":"2024-09-12T13:59:43.616119Z","shell.execute_reply.started":"2024-09-12T13:59:43.531459Z","shell.execute_reply":"2024-09-12T13:59:43.614893Z"},"trusted":True},"execution_count":10,"outputs":[{"execution_count":10,"output_type":"execute_result","data":{"text/plain":"index 0\ntitle 0\nvote_average 0\nruntime 0\nadult 0\noriginal_language 0\npopularity 0\ngenres 58964\norg_title 0\ndtype: int64"},"metadata":{}}]},{"cell_type":"code","source":"df['genres'] = df['genres'].fillna('unknown')","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:43.617572Z","iopub.execute_input":"2024-09-12T13:59:43.618503Z","iopub.status.idle":"2024-09-12T13:59:43.655645Z","shell.execute_reply.started":"2024-09-12T13:59:43.618448Z","shell.execute_reply":"2024-09-12T13:59:43.654221Z"},"trusted":True},"execution_count":11,"outputs":[]},{"cell_type":"code","source":"df.isna().sum()","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:43.657433Z","iopub.execute_input":"2024-09-12T13:59:43.657823Z","iopub.status.idle":"2024-09-12T13:59:43.737939Z","shell.execute_reply.started":"2024-09-12T13:59:43.657784Z","shell.execute_reply":"2024-09-12T13:59:43.736867Z"},"trusted":True},"execution_count":12,"outputs":[{"execution_count":12,"output_type":"execute_result","data":{"text/plain":"index 0\ntitle 0\nvote_average 0\nruntime 0\nadult 0\noriginal_language 0\npopularity 0\ngenres 0\norg_title 0\ndtype: int64"},"metadata":{}}]},{"cell_type":"code","source":"df.duplicated().sum()","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:43.739671Z","iopub.execute_input":"2024-09-12T13:59:43.740036Z","iopub.status.idle":"2024-09-12T13:59:44.069972Z","shell.execute_reply.started":"2024-09-12T13:59:43.739998Z","shell.execute_reply":"2024-09-12T13:59:44.068676Z"},"trusted":True},"execution_count":13,"outputs":[{"execution_count":13,"output_type":"execute_result","data":{"text/plain":"0"},"metadata":{}}]},{"cell_type":"code","source":"df = df.drop_duplicates()","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:44.071350Z","iopub.execute_input":"2024-09-12T13:59:44.071959Z","iopub.status.idle":"2024-09-12T13:59:44.440769Z","shell.execute_reply.started":"2024-09-12T13:59:44.071738Z","shell.execute_reply":"2024-09-12T13:59:44.439682Z"},"trusted":True},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"df.duplicated().sum()","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:44.442433Z","iopub.execute_input":"2024-09-12T13:59:44.442910Z","iopub.status.idle":"2024-09-12T13:59:44.760085Z","shell.execute_reply.started":"2024-09-12T13:59:44.442868Z","shell.execute_reply":"2024-09-12T13:59:44.758695Z"},"trusted":True},"execution_count":15,"outputs":[{"execution_count":15,"output_type":"execute_result","data":{"text/plain":"0"},"metadata":{}}]},{"cell_type":"code","source":"dff= df.copy()","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:44.761498Z","iopub.execute_input":"2024-09-12T13:59:44.761964Z","iopub.status.idle":"2024-09-12T13:59:44.781993Z","shell.execute_reply.started":"2024-09-12T13:59:44.761916Z","shell.execute_reply":"2024-09-12T13:59:44.780368Z"},"trusted":True},"execution_count":16,"outputs":[]},{"cell_type":"markdown","source":"# MultiLabel Encoder","metadata":{}},{"cell_type":"code","source":"from sklearn.preprocessing import OneHotEncoder, MultiLabelBinarizer\n\ngenre_l = dff['genres'].apply(lambda x: x.split(','))\ngenre_l = pd.DataFrame(genre_l)","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:44.783600Z","iopub.execute_input":"2024-09-12T13:59:44.783977Z","iopub.status.idle":"2024-09-12T13:59:45.404819Z","shell.execute_reply.started":"2024-09-12T13:59:44.783938Z","shell.execute_reply":"2024-09-12T13:59:45.403404Z"},"trusted":True},"execution_count":17,"outputs":[]},{"cell_type":"code","source":"genre_l","metadata":{"execution":{"iopub.status.busy":"2024-09-12T13:59:45.406505Z","iopub.execute_input":"2024-09-12T13:59:45.407007Z","iopub.status.idle":"2024-09-12T13:59:45.426034Z","shell.execute_reply.started":"2024-09-12T13:59:45.406952Z","shell.execute_reply":"2024-09-12T13:59:45.424721Z"},"trusted":True},"execution_count":18,"outputs":[{"execution_count":18,"output_type":"execute_result","data":{"text/plain":" genres\n0 [Action, Science Fiction, Adventure]\n1 [Adventure, Drama, Science Fiction]\n2 [Drama, Action, Crime, Thriller]\n3 [Action, Adventure, Fantasy, Science Fiction]\n4 [Science Fiction, Action, Adventure]\n... ...\n349033 [Animation, Comedy]\n349034 [unknown]\n349035 [Documentary]\n349036 [unknown]\n349037 [Science Fiction]\n\n[349038 rows x 1 columns]","text/html":"
\n | genres | \n
---|---|
0 | \n[Action, Science Fiction, Adventure] | \n
1 | \n[Adventure, Drama, Science Fiction] | \n
2 | \n[Drama, Action, Crime, Thriller] | \n
3 | \n[Action, Adventure, Fantasy, Science Fiction] | \n
4 | \n[Science Fiction, Action, Adventure] | \n
... | \n... | \n
349033 | \n[Animation, Comedy] | \n
349034 | \n[unknown] | \n
349035 | \n[Documentary] | \n
349036 | \n[unknown] | \n
349037 | \n[Science Fiction] | \n
349038 rows × 1 columns
\n\n | level_0 | \ntitle | \nvote_average | \nruntime | \nadult | \noriginal_language | \npopularity | \norg_title | \naction | \nadventure | \n... | \nhorror | \nmusic | \nmystery | \nromance | \nsciencefiction | \nthriller | \ntvmovie | \nunknown | \nwar | \nwestern | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n0 | \nInception | \n8.364 | \n148 | \nFalse | \nen | \n83.952 | \nInception | \n1 | \n1 | \n... | \n0 | \n0 | \n0 | \n0 | \n1 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
1 | \n1 | \nInterstellar | \n8.417 | \n169 | \nFalse | \nen | \n140.241 | \nInterstellar | \n0 | \n1 | \n... | \n0 | \n0 | \n0 | \n0 | \n1 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
2 | \n2 | \nThe Dark Knight | \n8.512 | \n152 | \nFalse | \nen | \n130.643 | \nThe Dark Knight | \n1 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n1 | \n0 | \n0 | \n0 | \n0 | \n
3 | \n3 | \nAvatar | \n7.573 | \n162 | \nFalse | \nen | \n79.932 | \nAvatar | \n1 | \n1 | \n... | \n0 | \n0 | \n0 | \n0 | \n1 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
4 | \n4 | \nThe Avengers | \n7.710 | \n143 | \nFalse | \nen | \n98.082 | \nThe Avengers | \n1 | \n1 | \n... | \n0 | \n0 | \n0 | \n0 | \n1 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
5 rows × 28 columns
\n\n | level_0 | \ntitle | \nvote_average | \nruntime | \nadult | \noriginal_language | \npopularity | \norg_title | \naction | \nadventure | \n... | \nhorror | \nmusic | \nmystery | \nromance | \nsciencefiction | \nthriller | \ntvmovie | \nunknown | \nwar | \nwestern | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n0 | \ninception | \n8.364 | \n148 | \nFalse | \nen | \n83.952 | \nInception | \n1 | \n1 | \n... | \n0 | \n0 | \n0 | \n0 | \n1 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
1 | \n1 | \ninterstellar | \n8.417 | \n169 | \nFalse | \nen | \n140.241 | \nInterstellar | \n0 | \n1 | \n... | \n0 | \n0 | \n0 | \n0 | \n1 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
2 | \n2 | \nthedarkknight | \n8.512 | \n152 | \nFalse | \nen | \n130.643 | \nThe Dark Knight | \n1 | \n0 | \n... | \n0 | \n0 | \n0 | \n0 | \n0 | \n1 | \n0 | \n0 | \n0 | \n0 | \n
3 | \n3 | \navatar | \n7.573 | \n162 | \nFalse | \nen | \n79.932 | \nAvatar | \n1 | \n1 | \n... | \n0 | \n0 | \n0 | \n0 | \n1 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
4 | \n4 | \ntheavengers | \n7.710 | \n143 | \nFalse | \nen | \n98.082 | \nThe Avengers | \n1 | \n1 | \n... | \n0 | \n0 | \n0 | \n0 | \n1 | \n0 | \n0 | \n0 | \n0 | \n0 | \n
5 rows × 28 columns
\n\n | level_0 | \ntitle | \nvote_average | \nruntime | \npopularity | \norg_title | \naction | \nadventure | \nanimation | \ncomedy | \n... | \nunknown | \nwar | \nwestern | \nadult_False | \noriginal_language_de | \noriginal_language_else | \noriginal_language_en | \noriginal_language_es | \noriginal_language_fr | \noriginal_language_ja | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n0 | \ninception | \n8.364 | \n148 | \n83.952 | \nInception | \n1 | \n1 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n1.0 | \n0.0 | \n0.0 | \n1.0 | \n0.0 | \n0.0 | \n0.0 | \n
1 | \n1 | \ninterstellar | \n8.417 | \n169 | \n140.241 | \nInterstellar | \n0 | \n1 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n1.0 | \n0.0 | \n0.0 | \n1.0 | \n0.0 | \n0.0 | \n0.0 | \n
2 | \n2 | \nthedarkknight | \n8.512 | \n152 | \n130.643 | \nThe Dark Knight | \n1 | \n0 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n1.0 | \n0.0 | \n0.0 | \n1.0 | \n0.0 | \n0.0 | \n0.0 | \n
3 | \n3 | \navatar | \n7.573 | \n162 | \n79.932 | \nAvatar | \n1 | \n1 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n1.0 | \n0.0 | \n0.0 | \n1.0 | \n0.0 | \n0.0 | \n0.0 | \n
4 | \n4 | \ntheavengers | \n7.710 | \n143 | \n98.082 | \nThe Avengers | \n1 | \n1 | \n0 | \n0 | \n... | \n0 | \n0 | \n0 | \n1.0 | \n0.0 | \n0.0 | \n1.0 | \n0.0 | \n0.0 | \n0.0 | \n
5 rows × 33 columns
\n\n | title | \norg_title | \nlevel_0 | \nvote_average | \nruntime | \npopularity | \naction | \nadventure | \nanimation | \ncomedy | \n... | \nunknown | \nwar | \nwestern | \nadult_False | \noriginal_language_de | \noriginal_language_else | \noriginal_language_en | \noriginal_language_es | \noriginal_language_fr | \noriginal_language_ja | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \ninception | \nInception | \n-1.732046 | \n1.156745 | \n1.193328 | \n6.055067 | \n3.468377 | \n4.804326 | \n-0.254952 | \n-0.524863 | \n... | \n-0.450857 | \n-0.132835 | \n-0.114207 | \n0.243481 | \n-0.21469 | \n-0.58974 | \n0.962307 | \n-0.273054 | \n-0.274818 | \n-0.201679 | \n
1 | \ninterstellar | \nInterstellar | \n-1.732036 | \n1.183750 | \n1.516277 | \n10.245121 | \n-0.288319 | \n4.804326 | \n-0.254952 | \n-0.524863 | \n... | \n-0.450857 | \n-0.132835 | \n-0.114207 | \n0.243481 | \n-0.21469 | \n-0.58974 | \n0.962307 | \n-0.273054 | \n-0.274818 | \n-0.201679 | \n
2 | \nthedarkknight | \nThe Dark Knight | \n-1.732026 | \n1.232154 | \n1.254842 | \n9.530663 | \n3.468377 | \n-0.208146 | \n-0.254952 | \n-0.524863 | \n... | \n-0.450857 | \n-0.132835 | \n-0.114207 | \n0.243481 | \n-0.21469 | \n-0.58974 | \n0.962307 | \n-0.273054 | \n-0.274818 | \n-0.201679 | \n
3 | \navatar | \nAvatar | \n-1.732016 | \n0.753715 | \n1.408628 | \n5.755825 | \n3.468377 | \n4.804326 | \n-0.254952 | \n-0.524863 | \n... | \n-0.450857 | \n-0.132835 | \n-0.114207 | \n0.243481 | \n-0.21469 | \n-0.58974 | \n0.962307 | \n-0.273054 | \n-0.274818 | \n-0.201679 | \n
4 | \ntheavengers | \nThe Avengers | \n-1.732006 | \n0.823519 | \n1.116435 | \n7.106879 | \n3.468377 | \n4.804326 | \n-0.254952 | \n-0.524863 | \n... | \n-0.450857 | \n-0.132835 | \n-0.114207 | \n0.243481 | \n-0.21469 | \n-0.58974 | \n0.962307 | \n-0.273054 | \n-0.274818 | \n-0.201679 | \n
5 rows × 33 columns
\norg_title | \nInception | \nInterstellar | \nAvatar | \nThe Avengers | \nDeadpool | \nAvengers: Infinity War | \nFight Club | \nGuardians of the Galaxy | \nPulp Fiction | \nForrest Gump | \n... | \nLa solapa | \nLonely Laura and Her Sister Niamh | \nIdol × Warrior: Miracle Tunes! Pilot | \nOceans Apart: Greed, Betrayal and Pacific Island Rugby | \nDangerously Delicious: Paid Advertisement | \nOne Foot Crane | \nTriomf | \nTom Goes to the Mayor Returns | \nDHONG | \nMore Dirty Debutantes 35 | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
thedarkknight | \n0.591092 | \n0.674085 | \n0.503908 | \n0.631122 | \n0.628958 | \n0.733181 | \n0.778691 | \n0.352667 | \n0.887382 | \n0.7002 | \n... | \n-0.139698 | \n-0.08233 | \n-0.095234 | \n-0.128858 | \n-0.157923 | \n0.087291 | \n-0.14619 | \n-0.155169 | \n-0.171874 | \n-0.073789 | \n
1 rows × 306945 columns
\n