Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Big Data Problem Tests Passed #71

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.project
.classpath
.settings/
.vscode/
target/
fileDataModel.csv
movies.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
package nearsoft.academy.bigdata.recommendation;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.zip.GZIPInputStream;

import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;

import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;

public class MovieRecommender {
private String path;
private String csvPath;
private String dataFileM;
private UserBasedRecommender recommender;
BiMap<String, Long> users;
BiMap<String, Integer> movies;
private int totalReviews;

public MovieRecommender(String in_path) throws IOException, TasteException {
path = in_path;
users = HashBiMap.create();
movies = HashBiMap.create();
totalReviews = 0;

csvPath = createCSVFile(path);
dataFileM = parseCSVFile();
}

public List<String> getRecommendationsForUser(String in_user) throws IOException, TasteException {
Long indexUser = users.get(in_user);
List<String> outputList = new ArrayList<String>();
System.out.println("Generation recommendations, please wait:");
DataModel model = new FileDataModel(new File(dataFileM));
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model);
recommender = new GenericUserBasedRecommender(model, neighborhood, similarity);

BiMap<Integer, String> invertedMovies = movies.inverse();
List<RecommendedItem> recommendations = recommender.recommend(indexUser, 3);

for (RecommendedItem recommendation : recommendations) {
Integer i = (int) (long) recommendation.getItemID();
String movieID = invertedMovies.get(i);
System.out.println(movieID);
outputList.add(movieID);
}
return outputList;
}

private String createCSVFile(String in_path) throws IOException, TasteException {

File csv = new File("movies.csv");
if (csv.exists())
csv.delete();

csv.createNewFile();
FileInputStream fileInput = new FileInputStream(path);
GZIPInputStream gzis = new GZIPInputStream(fileInput);
FileOutputStream fileOutput = new FileOutputStream(csv);

byte[] buffer = new byte[1024];
int length;
while ((length = gzis.read(buffer)) > 0)
fileOutput.write(buffer, 0, length);

fileInput.close();
gzis.close();
fileOutput.close();

return csv.getAbsolutePath();
}

private String parseCSVFile() throws IOException, TasteException {

File fileDataModel = new File("fileDataModel.csv");
if(fileDataModel.exists())
fileDataModel.delete();
fileDataModel.createNewFile();
try {
FileWriter fileOutput = new FileWriter(fileDataModel.getAbsolutePath());
BufferedWriter fileWriter = new BufferedWriter(fileOutput);
BufferedReader reader = new BufferedReader(new FileReader(csvPath));
String writerString = "";
long userCount = 0;
int moviesCount = 0;
String line = reader.readLine();
while (line != null) {
String dataLine;
if(line.startsWith(("product/productId")))
{
dataLine = line.split(" ")[1];
if(!movies.containsKey(dataLine))
{
movies.put(dataLine, moviesCount);
writerString = moviesCount + ",";
moviesCount++;
}
else
writerString = (movies.get(dataLine) + ",");
}
else if(line.startsWith(("review/userId")))
{
dataLine = line.split(" ")[1];
if(!users.containsKey(dataLine))
{
users.put(dataLine, userCount);
writerString = userCount + "," + writerString;
userCount++;
}
else
writerString = users.get(dataLine) + "," + writerString;
}
else if(line.startsWith(("review/score"))) //score
{
totalReviews++;
String reviewScore = line.split(" ")[1];
writerString += reviewScore + "\n";
fileWriter.write(writerString);
}
line = reader.readLine();
}
fileWriter.close();
fileOutput.close();
reader.close();
} catch (IOException e) {
e.printStackTrace();
}

return fileDataModel.getAbsolutePath();
}

public int getTotalProducts() {
return movies.size();
}

public int getTotalUsers() {
return users.size();
}

public int getTotalReviews() {
return totalReviews;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,24 @@

public class MovieRecommenderTest {
@Test
public void testDataInfo() throws IOException, TasteException {
//download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
public static void testDataInfo() throws IOException, TasteException {
// download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");
assertEquals(7911684, recommender.getTotalReviews());
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());


assertEquals(7911684, recommender.getTotalReviews());
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());

List<String> recommendations = recommender.getRecommendationsForUser("A141HP4LYPWMSR");
assertThat(recommendations, hasItem("B0002O7Y8U"));
assertThat(recommendations, hasItem("B00004CQTF"));
assertThat(recommendations, hasItem("B000063W82"));

assertThat(recommendations, hasItem("B000063W82"));
}

public static void main(String[] args) throws IOException, TasteException {
testDataInfo();
}
}