diff --git a/big-data.iml b/big-data.iml new file mode 100644 index 0000000..f267fe6 --- /dev/null +++ b/big-data.iml @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index 8169ff7..57a6efe 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,19 @@ nearsoft.academy big-data 1.0-SNAPSHOT - jar + + + + org.apache.maven.plugins + maven-compiler-plugin + + 11 + 11 + + + + + jar big-data http://maven.apache.org @@ -26,5 +38,10 @@ 4.7 test + + org.slf4j + slf4j-simple + 1.7.30 + diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java new file mode 100644 index 0000000..2b055e1 --- /dev/null +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -0,0 +1,117 @@ +package nearsoft.academy.bigdata.recommendation; + +import org.apache.commons.collections.BidiMap; +import org.apache.commons.collections.bidimap.TreeBidiMap; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +public class MovieRecommender { + private String txtPath; + private String csvPath; + private BidiMap userIds; + private BidiMap productsIds; + private int totalReviews; + private int totalProducts; + private int totalUsers; + + public MovieRecommender(String txtPath) throws IOException { + this.txtPath = txtPath; + this.csvPath = "data/movies.csv"; + this.userIds = new TreeBidiMap(); + this.productsIds = new TreeBidiMap(); + this.totalReviews = 0; + this.totalProducts = 0; + this.totalUsers = 0; + this.convertFileToCSV(); + } + + public int getTotalReviews() { + return this.totalReviews; + } + + public int getTotalProducts() { + return this.totalProducts; + } + + public int getTotalUsers() { + return this.totalUsers; + } + + public List getRecommendationsForUser(String user) throws IOException, TasteException { + UserBasedRecommender recommender = createRecommender(this.csvPath); + int userKey = Integer.parseInt(userIds.get(user).toString()); + + List recommendations = recommender.recommend(userKey, 10); + List recommendedMovies = new ArrayList<>(); + String product; + for (RecommendedItem recommendation : recommendations) { + product = this.productsIds.getKey((int) recommendation.getItemID()).toString(); + recommendedMovies.add(product); + } + + return recommendedMovies; + } + + private UserBasedRecommender createRecommender(String path) throws IOException, TasteException { + DataModel model = new FileDataModel(new File(path)); + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); + UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); + + return recommender; + } + + public void convertFileToCSV() throws IOException { + BufferedReader br = new BufferedReader(new FileReader(this.txtPath)); + BufferedWriter bw = new BufferedWriter(new FileWriter(this.csvPath)); + + String line; + String[] newLine = {"", "", ""}; + while((line = br.readLine()) != null) { + String[] values = line.split(": ", -1); + if(values[0].equals("review/userId")) { + if(this.userIds.get(values[1]) == null) { + this.userIds.put(values[1], this.totalUsers); + newLine[0] = this.userIds.get(values[1]).toString(); + this.totalUsers++; + } else { + newLine[0] = this.userIds.get(values[1]).toString(); + } + } + if(values[0].equals("product/productId")) { + if(this.productsIds.get(values[1]) == null) { + this.productsIds.put(values[1], this.totalProducts); + newLine[1] = this.productsIds.get(values[1]).toString(); + this.totalProducts++; + } else { + newLine[1] = this.productsIds.get(values[1]).toString(); + } + } + if(values[0].equals("review/score")) { + newLine[2] = values[1]; + bw.write(newLine[0] + "," + newLine[1] + "," + newLine[2] + '\n'); + this.totalReviews++; + } + } + br.close(); + bw.close(); + } + +} diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 0d0b1fe..a6127ed 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -15,7 +15,8 @@ public class MovieRecommenderTest { public void testDataInfo() throws IOException, TasteException { //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); + + MovieRecommender recommender = new MovieRecommender("data/movies.txt"); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); assertEquals(889176, recommender.getTotalUsers()); diff --git a/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommender.class b/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommender.class new file mode 100644 index 0000000..3e96084 Binary files /dev/null and b/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommender.class differ diff --git a/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.class b/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.class new file mode 100644 index 0000000..d142ee0 Binary files /dev/null and b/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.class differ