Spaces:

Wintersmith
/

Book_recommender

Runtime error

App Files Files Community

Book_recommender / recommender_system.py

Wintersmith

Upload recommender_system.py

88d6858 verified over 1 year ago

raw

history blame contribute delete

3.89 kB

	import pandas as pd
	import numpy as np
	from fuzzywuzzy import process


	def match_books(user_input: str, df: pd.DataFrame, min_score: float = 0.8):
	# Use process.extractOne to get the best match
	book_titles = df['Book-Title'].unique()
	best_match = process.extractOne(user_input, book_titles)
	# Check if the best match score is above the minimum score
	if best_match and best_match[1] >= min_score:
	result = best_match[0]
	else:
	result = None
	return result


	def recommend_books(df: pd.DataFrame, book_to_be_recommended: str) -> pd.DataFrame:
	"""
	The recommend_books_new function identifies users who have read a specified book,
	finds other books these users have read, computes the correlation between the specified book and these other books,
	and returns a DataFrame with the recommended books, their correlation scores, and average ratings.
	"""

	# Get relevant dataset of book's readers
	book_readers = df['User-ID'][df['Book-Title'] == book_to_be_recommended]
	book_readers = book_readers.tolist()
	book_readers = np.unique(book_readers)

	# Final dataset
	books_of_book_readers = df[(df['User-ID'].isin(book_readers))]
	number_of_rating_per_book = books_of_book_readers.groupby(['Book-Title']).agg('count').reset_index()

	# Iterate over the number_of_user_ratings to get the highest number,
	# while keeping at least 10 final records
	threshold = 0
	while True:
	books_to_compare = number_of_rating_per_book['Book-Title'][number_of_rating_per_book['User-ID'] >= threshold]
	books_to_compare = books_to_compare.tolist()
	print(f"Threshold: {threshold}, Number of books to compare: {len(books_to_compare)}")
	if len(books_to_compare) <= 11:
	books_to_compare = number_of_rating_per_book['Book-Title'][number_of_rating_per_book['User-ID'] >= threshold-1]
	break
	threshold += 1

	ratings_data_raw = books_of_book_readers[['User-ID', 'Book-Rating', 'Book-Title']][
	books_of_book_readers['Book-Title'].isin(books_to_compare)]

	# group by User and Book and compute mean
	ratings_data_raw_nodup = ratings_data_raw.groupby(['User-ID', 'Book-Title'])['Book-Rating'].mean()

	# reset index to see User-ID in every row
	ratings_data_raw_nodup = ratings_data_raw_nodup.to_frame().reset_index()

	dataset_for_corr = ratings_data_raw_nodup.pivot(index='User-ID', columns='Book-Title', values='Book-Rating')

	# Method 1: Using pandas corr() with pairwise complete observations
	correlations = dataset_for_corr.corrwith(dataset_for_corr[book_to_be_recommended], method='pearson')

	# Add average ratings for each book in dataset_for_corr
	average_ratings = ratings_data_raw_nodup.groupby('Book-Title')['Book-Rating'].mean().reset_index()

	# Create DataFrame with correlations
	correlations_df = pd.DataFrame({
	'Book-Title': correlations.index,
	'Correlation [%]': correlations.values,
	})

	# Merge correlations_df with average_ratings
	correlations_df = pd.merge(correlations_df, average_ratings, on='Book-Title')
	correlations_df = correlations_df.rename(columns={'Book-Rating': 'Average ratings'})

	# Sort by correlation value
	correlations_df = correlations_df.sort_values('Correlation [%]', ascending=False)

	# convert correlation column to percentage and limit to two decimals
	correlations_df['Correlation [%]'] = correlations_df['Correlation [%]'] * 100
	correlations_df['Correlation [%]'] = correlations_df['Correlation [%]'].round(2)

	# Remove the book being recommended from the list
	correlations_df = correlations_df[correlations_df['Book-Title'] != book_to_be_recommended]
	correlations_df = correlations_df.head(10)

	return correlations_df