MK-25source · Pranjali2702 · Apr 8, 2026
diff --git a/510_23BAI10050.py b/510_23BAI10050.py
@@ -0,0 +1,211 @@
+# -*- coding: utf-8 -*-
+"""reccomendation_rl.ipynb
+
+Automatically generated by Colab.
+
+Original file is located at
+    https://colab.research.google.com/drive/11oa4Qdh7gurKVN6aAQ2XWskUW7YxLurJ
+"""
+
+import os
+print(os.listdir())
+
+import pandas as pd
+
+ratings = pd.read_csv('ratings.csv')
+
+movies = pd.read_csv(
+    'movies_metadata.csv',
+    engine='python',        # 🔥 IMPORTANT
+    on_bad_lines='skip'     # skip broken rows
+)
+
+print("Loaded successfully ✅")
+
+movies = movies[['id', 'title']]
+
+# Remove invalid IDs
+movies = movies[movies['id'].astype(str).str.isnumeric()]
+movies['id'] = movies['id'].astype(int)
+
+df = pd.merge(ratings, movies, left_on='movieId', right_on='id')
+
+df = df.head(20000)  # keep it small for now
+
+print(df.head())
+
+print(movies.shape)
+print(ratings.shape)
+
+import numpy as np
+import random
+
+# Unique users and movies
+users = df['userId'].unique()
+movies_list = df['movieId'].unique()
+
+# Index mapping
+user_to_index = {u: i for i, u in enumerate(users)}
+movie_to_index = {m: i for i, m in enumerate(movies_list)}
+
+# Q-table
+q_table = np.zeros((len(users), len(movies_list)))
+
+print("Users:", len(users))
+print("Movies:", len(movies_list))
+
+def get_reward(user, movie):
+    r = df[(df.userId == user) & (df.movieId == movie)]
+
+    if not r.empty:
+        rating = float(r['rating'])
+        return (rating - 2.5) / 2.5   # normalize between -1 to +1
+    return -0.05   # small penalty for unknown
+
+alpha = 0.1
+gamma = 0.9
+epsilon = 0.3   # more exploration
+
+episodes = 1000
+
+for ep in range(episodes):
+    user = random.choice(users)
+    user_idx = user_to_index[user]
+
+    # epsilon-greedy
+    if random.uniform(0, 1) < epsilon:
+        movie_idx = random.randint(0, len(movies_list)-1)
+    else:
+        movie_idx = np.argmax(q_table[user_idx])
+
+    movie = movies_list[movie_idx]
+    reward = get_reward(user, movie)
+
+    best_next = np.max(q_table[user_idx])
+
+    # Q update
+    q_table[user_idx, movie_idx] += alpha * (
+        reward + gamma * best_next - q_table[user_idx, movie_idx]
+    )
+
+    # reduce exploration slowly
+    epsilon = max(0.05, epsilon * 0.995)
+
+print("Training Done ✅")
+
+def recommend(user_id, top_k=5):
+    if user_id not in user_to_index:
+        return ["New user - no data"]
+
+    user_idx = user_to_index[user_id]
+    scores = q_table[user_idx]
+
+    top_items = np.argsort(scores)[-top_k:]
+
+    rec_movies = []
+    for i in reversed(top_items):
+        movie_id = movies_list[i]
+
+        movie_row = df[df.movieId == movie_id]
+        if not movie_row.empty:
+            title = movie_row.iloc[0]['title']
+            rec_movies.append(title)
+
+    return rec_movies
+
+print("Recommendations for User 1:")
+print(recommend(1))
+
+np.save("q_table.npy", q_table)
+
+def show_user_history(user_id):
+    user_data = df[df.userId == user_id].sort_values(by='rating', ascending=False)
+    return user_data[['title', 'rating']].head(5)
+
+print(show_user_history(1))
+
+rewards_log = []
+
+for ep in range(episodes):
+    user = random.choice(users)
+    user_idx = user_to_index[user]
+
+    if random.uniform(0, 1) < epsilon:
+        movie_idx = random.randint(0, len(movies_list)-1)
+    else:
+        movie_idx = np.argmax(q_table[user_idx])
+
+    movie = movies_list[movie_idx]
+    reward = get_reward(user, movie)
+
+    rewards_log.append(reward)
+
+    best_next = np.max(q_table[user_idx])
+
+    q_table[user_idx, movie_idx] += alpha * (
+        reward + gamma * best_next - q_table[user_idx, movie_idx]
+    )
+
+import matplotlib.pyplot as plt
+
+plt.plot(rewards_log)
+plt.title("Reward over Time")
+plt.xlabel("Episodes")
+plt.ylabel("Reward")
+plt.show()
+
+def recommend_with_scores(user_id, top_k=5):
+    user_idx = user_to_index[user_id]
+    scores = q_table[user_idx]
+
+    top_items = np.argsort(scores)[-top_k:]
+
+    result = []
+    for i in reversed(top_items):
+        movie_id = movies_list[i]
+        title = df[df.movieId == movie_id]['title'].iloc[0]
+        result.append((title, scores[i]))
+
+    return result
+
+print(recommend_with_scores(1))
+
+def popular_movies(top_k=5):
+    return df.groupby('title')['rating'].mean().sort_values(ascending=False).head(top_k)
+
+print(popular_movies())
+
+def recommend_clean(user_id, top_k=5):
+    if user_id not in user_to_index:
+        return ["New user"]
+
+    user_idx = user_to_index[user_id]
+    scores = q_table[user_idx]
+
+    watched = set(df[df.userId == user_id]['movieId'])
+
+    sorted_items = np.argsort(scores)[::-1]
+
+    rec_movies = []
+    for i in sorted_items:
+        movie_id = movies_list[i]
+
+        if movie_id not in watched:
+            title = df[df.movieId == movie_id]['title'].iloc[0]
+            rec_movies.append(title)
+
+        if len(rec_movies) == top_k:
+            break
+
+    return rec_movies
+
+print(recommend_clean(1))
+
+def show_output(user_id):
+    print("🎬 User's Favorite Movies:")
+    print(show_user_history(user_id))
+
+    print("\n🤖 Recommended Movies:")
+    print(recommend_clean(user_id))
+
+show_output(1)