{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyMAFA2EYfyZHWxBXYVidXcz"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["## Mengunduh dan Menyiapkan Dataset MovieLens 100k\n","Pertama, kita unduh dataset dari MovieLens atau gunakan library surprise untuk memuatnya dengan mudah."],"metadata":{"id":"XuAFN0HOVwz5"}},{"cell_type":"code","execution_count":2,"metadata":{"id":"a5vVoaGrOWqT","executionInfo":{"status":"ok","timestamp":1724049334056,"user_tz":-420,"elapsed":2310,"user":{"displayName":"Andys Collection","userId":"04951959771200949138"}}},"outputs":[],"source":["import pandas as pd\n","import numpy as np\n","from sklearn.model_selection import train_test_split"]},{"cell_type":"code","source":["# Mengunduh dataset MovieLens 100k\n","!wget -q https://files.grouplens.org/datasets/movielens/ml-100k.zip\n","!unzip -q ml-100k.zip\n","\n","# Memuat data\n","data = pd.read_csv('ml-100k/u.data', sep='\\t', names=['user_id', 'item_id', 'rating', 'timestamp'])\n","data = data[['user_id', 'item_id', 'rating']] # Kita hanya butuh user_id, item_id, dan rating\n","\n","# Membaca file item yang berisi metadata\n","genre_list = [\"unknown\", \"Action\", \"Adventure\", \"Animation\", \"Children's\", \"Comedy\", \"Crime\", \"Documentary\", \"Drama\", \"Fantasy\", \"Film-Noir\", \"Horror\", \"Musical\", \"Mystery\", \"Romance\", \"Sci-Fi\", \"Thriller\", \"War\", \"Western\"]\n","columns = [\"item_id\", \"title\", \"release_date\", \"video_release_date\", \"IMDb_URL\"] + genre_list\n","item_metadata = pd.read_csv('ml-100k/u.item', sep='|', encoding='ISO-8859-1', header=None, names=columns, usecols=range(24))\n","\n","# # Menggabungkan metadata dengan dataset utama\n","data = pd.merge(data, item_metadata, on='item_id')\n","\n","# Normalisasi ID pengguna dan item (karena ID asli mungkin tidak dimulai dari 0)\n","data['user_id'] = data['user_id'] - 1\n","data['item_id'] = data['item_id'] - 1\n","\n","# Melihat statistik dataset\n","num_users = data['user_id'].nunique()\n","num_items = data['item_id'].nunique()\n","print(f\"Number of users: {num_users}, Number of items: {num_items}\")\n","\n","# Split dataset menjadi train dan test\n","train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)\n","\n","item_metadata.head()"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":411},"id":"c1A17vhhVubf","executionInfo":{"status":"ok","timestamp":1724049863540,"user_tz":-420,"elapsed":4952,"user":{"displayName":"Andys Collection","userId":"04951959771200949138"}},"outputId":"762dffb5-22fe-4cfa-971c-fb62eeec95c8"},"execution_count":6,"outputs":[{"output_type":"stream","name":"stdout","text":["replace ml-100k/allbut.pl? [y]es, [n]o, [A]ll, [N]one, [r]ename: A\n","Number of users: 943, Number of items: 1682\n"]},{"output_type":"execute_result","data":{"text/plain":[" item_id title release_date video_release_date \\\n","0 1 Toy Story (1995) 01-Jan-1995 NaN \n","1 2 GoldenEye (1995) 01-Jan-1995 NaN \n","2 3 Four Rooms (1995) 01-Jan-1995 NaN \n","3 4 Get Shorty (1995) 01-Jan-1995 NaN \n","4 5 Copycat (1995) 01-Jan-1995 NaN \n","\n"," IMDb_URL unknown Action \\\n","0 http://us.imdb.com/M/title-exact?Toy%20Story%2... 0 0 \n","1 http://us.imdb.com/M/title-exact?GoldenEye%20(... 0 1 \n","2 http://us.imdb.com/M/title-exact?Four%20Rooms%... 0 0 \n","3 http://us.imdb.com/M/title-exact?Get%20Shorty%... 0 1 \n","4 http://us.imdb.com/M/title-exact?Copycat%20(1995) 0 0 \n","\n"," Adventure Animation Children's ... Fantasy Film-Noir Horror Musical \\\n","0 0 1 1 ... 0 0 0 0 \n","1 1 0 0 ... 0 0 0 0 \n","2 0 0 0 ... 0 0 0 0 \n","3 0 0 0 ... 0 0 0 0 \n","4 0 0 0 ... 0 0 0 0 \n","\n"," Mystery Romance Sci-Fi Thriller War Western \n","0 0 0 0 0 0 0 \n","1 0 0 0 1 0 0 \n","2 0 0 0 1 0 0 \n","3 0 0 0 0 0 0 \n","4 0 0 0 1 0 0 \n","\n","[5 rows x 24 columns]"],"text/html":["\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
item_idtitlerelease_datevideo_release_dateIMDb_URLunknownActionAdventureAnimationChildren's...FantasyFilm-NoirHorrorMusicalMysteryRomanceSci-FiThrillerWarWestern
01Toy Story (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Toy%20Story%2...00011...0000000000
12GoldenEye (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?GoldenEye%20(...01100...0000000100
23Four Rooms (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Four%20Rooms%...00000...0000000100
34Get Shorty (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Get%20Shorty%...01000...0000000000
45Copycat (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Copycat%20(1995)00000...0000000100
\n","

5 rows × 24 columns

\n","
\n","
\n","\n","
\n"," \n","\n"," \n","\n"," \n","
\n","\n","\n","
\n"," \n","\n","\n","\n"," \n","
\n","\n","
\n","
\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"item_metadata"}},"metadata":{},"execution_count":6}]},{"cell_type":"markdown","source":["## Membuat Model Matrix Factorization\n","Sekarang kita akan membuat model matrix factorization dengan TensorFlow atau PyTorch yang mirip dengan yang sudah kita implementasikan sebelumnya."],"metadata":{"id":"D0pwH9lFY9oc"}},{"cell_type":"code","source":["import torch\n","import torch.nn as nn\n","import torch.optim as optim\n","from torch.utils.data import DataLoader, Dataset"],"metadata":{"id":"QFeYKahGZF_I","executionInfo":{"status":"ok","timestamp":1724050080430,"user_tz":-420,"elapsed":6584,"user":{"displayName":"Andys Collection","userId":"04951959771200949138"}}},"execution_count":7,"outputs":[]},{"cell_type":"code","source":["class MovieLensDatasetWithMetadata(Dataset):\n"," def __init__(self, ratings, metadata):\n"," \"\"\"\n"," Args:\n"," ratings (DataFrame): DataFrame yang berisi kolom user_id, item_id, dan rating.\n"," metadata (ndarray): Numpy array atau tensor berisi metadata item (misalnya genre).\n"," \"\"\"\n"," self.user_ids = torch.tensor(ratings['user_id'].values, dtype=torch.long)\n"," self.item_ids = torch.tensor(ratings['item_id'].values, dtype=torch.long)\n"," self.ratings = torch.tensor(ratings['rating'].values, dtype=torch.float32)\n"," self.metadata = torch.tensor(metadata, dtype=torch.float32)\n","\n"," def __len__(self):\n"," return len(self.ratings)\n","\n"," def __getitem__(self, idx):\n"," user_id = self.user_ids[idx]\n"," item_id = self.item_ids[idx]\n"," rating = self.ratings[idx]\n"," metadata = self.metadata[item_id] # Metadata diambil berdasarkan item_id\n"," return user_id, item_id, rating, metadata"],"metadata":{"id":"7FLSZaW-a-Ro","executionInfo":{"status":"ok","timestamp":1724050569156,"user_tz":-420,"elapsed":402,"user":{"displayName":"Andys Collection","userId":"04951959771200949138"}}},"execution_count":12,"outputs":[]},{"cell_type":"code","source":["class MFModelWithMetadata(nn.Module):\n"," def __init__(self, num_users, num_items, embedding_size, metadata_size, reg_factor):\n"," super(MFModelWithMetadata, self).__init__()\n"," self.user_embedding = nn.Embedding(num_users, embedding_size)\n"," self.item_embedding = nn.Embedding(num_items, embedding_size)\n"," self.user_bias = nn.Embedding(num_users, 1)\n"," self.item_bias = nn.Embedding(num_items, 1)\n"," self.metadata_dense = nn.Linear(metadata_size, embedding_size)\n"," self.reg_factor = reg_factor\n","\n"," def forward(self, user_id, item_id, item_metadata):\n"," user_vec = self.user_embedding(user_id)\n"," item_vec = self.item_embedding(item_id)\n"," metadata_vec = torch.relu(self.metadata_dense(item_metadata))\n"," user_bias = self.user_bias(user_id).squeeze()\n"," item_bias = self.item_bias(item_id).squeeze()\n","\n"," # Menggabungkan embedding item dengan vektor metadata\n"," combined_item_vec = item_vec + metadata_vec\n","\n"," dot_product = torch.sum(user_vec * combined_item_vec, dim=1)\n"," return dot_product + user_bias + item_bias\n","\n"," def regularization_loss(self):\n"," return self.reg_factor * (torch.norm(self.user_embedding.weight) + torch.norm(self.item_embedding.weight))"],"metadata":{"id":"Vobp8x1zYeuw","executionInfo":{"status":"ok","timestamp":1724050573193,"user_tz":-420,"elapsed":5,"user":{"displayName":"Andys Collection","userId":"04951959771200949138"}}},"execution_count":13,"outputs":[]},{"cell_type":"code","source":["# Melatih model\n","metadata_input = torch.tensor(item_metadata[genre_list].values, dtype=torch.float32)\n","train_metadata = metadata_input[train_data['item_id'].values]\n","\n","train_dataset = MovieLensDatasetWithMetadata(train_data, train_metadata)\n","train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)\n","\n","model = MFModelWithMetadata(num_users, num_items, embedding_size=32, metadata_size=len(genre_list), reg_factor=0.01)\n","criterion = nn.MSELoss()\n","optimizer = optim.Adam(model.parameters(), lr=0.01)\n","\n","# Training loop\n","for epoch in range(20):\n"," model.train()\n"," total_loss = 0\n"," for user_ids, item_ids, ratings, metadata in train_loader:\n"," optimizer.zero_grad()\n"," predictions = model(user_ids, item_ids, metadata)\n"," loss = criterion(predictions, ratings)\n"," loss += model.regularization_loss() # Menambahkan regularisasi\n"," loss.backward()\n"," optimizer.step()\n"," total_loss += loss.item()\n"," print(f\"Epoch {epoch+1}, Loss: {total_loss/len(train_loader):.4f}\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"_GlxO1NTacre","executionInfo":{"status":"ok","timestamp":1724050727524,"user_tz":-420,"elapsed":51674,"user":{"displayName":"Andys Collection","userId":"04951959771200949138"}},"outputId":"bb7020d3-678d-410f-beff-6b13ef24f69e"},"execution_count":15,"outputs":[{"output_type":"stream","name":"stderr","text":[":11: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n"," self.metadata = torch.tensor(metadata, dtype=torch.float32)\n"]},{"output_type":"stream","name":"stdout","text":["Epoch 1, Loss: 30.5156\n","Epoch 2, Loss: 9.8794\n","Epoch 3, Loss: 5.5845\n","Epoch 4, Loss: 4.0761\n","Epoch 5, Loss: 3.3534\n","Epoch 6, Loss: 2.9154\n","Epoch 7, Loss: 2.6030\n","Epoch 8, Loss: 2.3503\n","Epoch 9, Loss: 2.1314\n","Epoch 10, Loss: 1.9354\n","Epoch 11, Loss: 1.7634\n","Epoch 12, Loss: 1.6138\n","Epoch 13, Loss: 1.4952\n","Epoch 14, Loss: 1.4069\n","Epoch 15, Loss: 1.3445\n","Epoch 16, Loss: 1.2983\n","Epoch 17, Loss: 1.2678\n","Epoch 18, Loss: 1.2397\n","Epoch 19, Loss: 1.2157\n","Epoch 20, Loss: 1.1865\n"]}]},{"cell_type":"markdown","source":["## Mengevaluasi Kinerja Model\n","Setelah training, kita evaluasi model pada dataset test menggunakan RMSE."],"metadata":{"id":"Wf2yPJlPbxWO"}},{"cell_type":"code","source":["from sklearn.metrics import mean_squared_error\n","import numpy as np\n","\n","model.eval()\n","test_metadata = metadata_input[test_data['item_id'].values]\n","\n","test_dataset = MovieLensDatasetWithMetadata(test_data, test_metadata)\n","test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False)\n","\n","predictions, targets = [], []\n","with torch.no_grad():\n"," for user_ids, item_ids, ratings, metadata in test_loader:\n"," output = model(user_ids, item_ids, metadata)\n"," predictions.extend(output.numpy())\n"," targets.extend(ratings.numpy())\n","\n","rmse = np.sqrt(mean_squared_error(targets, predictions))\n","print(f\"Test RMSE: {rmse:.4f}\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"nTUvdTFIalb5","executionInfo":{"status":"ok","timestamp":1724050795644,"user_tz":-420,"elapsed":989,"user":{"displayName":"Andys Collection","userId":"04951959771200949138"}},"outputId":"279cf213-0a0c-4b22-8d5a-8017b8fafda2"},"execution_count":17,"outputs":[{"output_type":"stream","name":"stderr","text":[":11: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n"," self.metadata = torch.tensor(metadata, dtype=torch.float32)\n"]},{"output_type":"stream","name":"stdout","text":["Test RMSE: 0.9595\n"]}]},{"cell_type":"markdown","source":["## Melakukan Top-N Recommendation Untuk Pengguna Tertentu\n","Untuk menampilkan Top-N prediksi untuk pengguna tertentu, kamu bisa mengambil semua item yang belum dirating oleh pengguna tersebut dan menghitung prediksinya. Setelah itu, kamu bisa mengurutkannya berdasarkan nilai prediksi dari yang tertinggi.\n","\n","Penjelasan:\n","- Memfilter Item yang Belum Dirating: Untuk setiap pengguna, kita ambil semua item yang belum pernah mereka beri rating. Ini penting karena rekomendasi seharusnya diberikan untuk item yang belum pernah dilihat atau dinilai.\n","- Prediksi Semua Rating: Untuk setiap item yang belum dirating, kita prediksi rating menggunakan model yang telah dilatih.\n","- Urutkan Prediksi: Setelah semua prediksi dihitung, kita urutkan dari yang tertinggi ke terendah dan ambil N prediksi teratas.\n","- Mengembalikan Item yang Direkomendasikan: Output adalah daftar item (misalnya film) yang direkomendasikan untuk pengguna tersebut."],"metadata":{"id":"ONQGz8OKb_aM"}},{"cell_type":"code","source":["def get_top_n_recommendations_pytorch(model, user_id, N=10):\n"," # Dapatkan semua item yang tersedia\n"," all_items = np.array(range(num_items))\n","\n"," # Cek item yang sudah dirating oleh user\n"," rated_items = train_data[train_data['user_id'] == user_id]['item_id'].values\n","\n"," # Ambil item yang belum dirating oleh user\n"," items_to_predict = np.setdiff1d(all_items, rated_items)\n","\n"," # Dapatkan metadata dari item yang akan diprediksi\n"," items_metadata = metadata_input[items_to_predict]\n","\n"," model.eval()\n"," with torch.no_grad():\n"," user_ids = torch.tensor([user_id] * len(items_to_predict))\n"," item_ids = torch.tensor(items_to_predict)\n"," metadata = torch.tensor(items_metadata, dtype=torch.float32)\n"," predicted_ratings = model(user_ids, item_ids, metadata).numpy()\n","\n"," # Urutkan item berdasarkan rating tertinggi\n"," top_n_items = items_to_predict[np.argsort(predicted_ratings)[-N:][::-1]]\n","\n"," return top_n_items\n","\n","# Contoh penggunaan\n","user_id = 0\n","top_n_recommendations = get_top_n_recommendations_pytorch(model, user_id, N=10)\n","print(f\"Top 10 recommended items for user {user_id}: {top_n_recommendations}\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"O6UMD30pb8KI","executionInfo":{"status":"ok","timestamp":1724050857455,"user_tz":-420,"elapsed":539,"user":{"displayName":"Andys Collection","userId":"04951959771200949138"}},"outputId":"a71e90ab-0fc0-466d-8b41-9637809f963a"},"execution_count":18,"outputs":[{"output_type":"stream","name":"stdout","text":["Top 10 recommended items for user 0: [ 813 1448 856 1466 1499 849 1650 407 171 312]\n"]},{"output_type":"stream","name":"stderr","text":[":18: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n"," metadata = torch.tensor(items_metadata, dtype=torch.float32)\n"]}]},{"cell_type":"markdown","source":["- Metadata dalam Evaluasi: Saat mengevaluasi model, kita sertakan metadata dari item yang ada di test set.\n","- Prediksi Top-N: Metadata juga disertakan dalam proses prediksi untuk semua item yang belum dirating oleh pengguna, lalu kita ambil N item dengan prediksi tertinggi."],"metadata":{"id":"7LeYwz1QcxWH"}}]}