Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
211 changes: 211 additions & 0 deletions 510_23BAI10050.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
# -*- coding: utf-8 -*-
"""reccomendation_rl.ipynb

Automatically generated by Colab.

Original file is located at
https://colab.research.google.com/drive/11oa4Qdh7gurKVN6aAQ2XWskUW7YxLurJ
"""

import os
print(os.listdir())

import pandas as pd

ratings = pd.read_csv('ratings.csv')

movies = pd.read_csv(
'movies_metadata.csv',
engine='python', # 🔥 IMPORTANT
on_bad_lines='skip' # skip broken rows
)

print("Loaded successfully ✅")

movies = movies[['id', 'title']]

# Remove invalid IDs
movies = movies[movies['id'].astype(str).str.isnumeric()]
movies['id'] = movies['id'].astype(int)

df = pd.merge(ratings, movies, left_on='movieId', right_on='id')

df = df.head(20000) # keep it small for now

print(df.head())

print(movies.shape)
print(ratings.shape)

import numpy as np
import random

# Unique users and movies
users = df['userId'].unique()
movies_list = df['movieId'].unique()

# Index mapping
user_to_index = {u: i for i, u in enumerate(users)}
movie_to_index = {m: i for i, m in enumerate(movies_list)}

# Q-table
q_table = np.zeros((len(users), len(movies_list)))

print("Users:", len(users))
print("Movies:", len(movies_list))

def get_reward(user, movie):
r = df[(df.userId == user) & (df.movieId == movie)]

if not r.empty:
rating = float(r['rating'])
return (rating - 2.5) / 2.5 # normalize between -1 to +1
return -0.05 # small penalty for unknown

alpha = 0.1
gamma = 0.9
epsilon = 0.3 # more exploration

episodes = 1000

for ep in range(episodes):
user = random.choice(users)
user_idx = user_to_index[user]

# epsilon-greedy
if random.uniform(0, 1) < epsilon:
movie_idx = random.randint(0, len(movies_list)-1)
else:
movie_idx = np.argmax(q_table[user_idx])

movie = movies_list[movie_idx]
reward = get_reward(user, movie)

best_next = np.max(q_table[user_idx])

# Q update
q_table[user_idx, movie_idx] += alpha * (
reward + gamma * best_next - q_table[user_idx, movie_idx]
)

# reduce exploration slowly
epsilon = max(0.05, epsilon * 0.995)

print("Training Done ✅")

def recommend(user_id, top_k=5):
if user_id not in user_to_index:
return ["New user - no data"]

user_idx = user_to_index[user_id]
scores = q_table[user_idx]

top_items = np.argsort(scores)[-top_k:]

rec_movies = []
for i in reversed(top_items):
movie_id = movies_list[i]

movie_row = df[df.movieId == movie_id]
if not movie_row.empty:
title = movie_row.iloc[0]['title']
rec_movies.append(title)

return rec_movies

print("Recommendations for User 1:")
print(recommend(1))

np.save("q_table.npy", q_table)

def show_user_history(user_id):
user_data = df[df.userId == user_id].sort_values(by='rating', ascending=False)
return user_data[['title', 'rating']].head(5)

print(show_user_history(1))

rewards_log = []

for ep in range(episodes):
user = random.choice(users)
user_idx = user_to_index[user]

if random.uniform(0, 1) < epsilon:
movie_idx = random.randint(0, len(movies_list)-1)
else:
movie_idx = np.argmax(q_table[user_idx])

movie = movies_list[movie_idx]
reward = get_reward(user, movie)

rewards_log.append(reward)

best_next = np.max(q_table[user_idx])

q_table[user_idx, movie_idx] += alpha * (
reward + gamma * best_next - q_table[user_idx, movie_idx]
)

import matplotlib.pyplot as plt

plt.plot(rewards_log)
plt.title("Reward over Time")
plt.xlabel("Episodes")
plt.ylabel("Reward")
plt.show()

def recommend_with_scores(user_id, top_k=5):
user_idx = user_to_index[user_id]
scores = q_table[user_idx]

top_items = np.argsort(scores)[-top_k:]

result = []
for i in reversed(top_items):
movie_id = movies_list[i]
title = df[df.movieId == movie_id]['title'].iloc[0]
result.append((title, scores[i]))

return result

print(recommend_with_scores(1))

def popular_movies(top_k=5):
return df.groupby('title')['rating'].mean().sort_values(ascending=False).head(top_k)

print(popular_movies())

def recommend_clean(user_id, top_k=5):
if user_id not in user_to_index:
return ["New user"]

user_idx = user_to_index[user_id]
scores = q_table[user_idx]

watched = set(df[df.userId == user_id]['movieId'])

sorted_items = np.argsort(scores)[::-1]

rec_movies = []
for i in sorted_items:
movie_id = movies_list[i]

if movie_id not in watched:
title = df[df.movieId == movie_id]['title'].iloc[0]
rec_movies.append(title)

if len(rec_movies) == top_k:
break

return rec_movies

print(recommend_clean(1))

def show_output(user_id):
print("🎬 User's Favorite Movies:")
print(show_user_history(user_id))

print("\n🤖 Recommended Movies:")
print(recommend_clean(user_id))

show_output(1)