From 71440b85bcabe8623b9aefa35b06305b5d3fc9c0 Mon Sep 17 00:00:00 2001 From: cavesdev <24899540+cavesdev@users.noreply.github.com> Date: Sun, 24 Oct 2021 15:17:22 -0500 Subject: [PATCH 1/2] - Added MovieRecommender class to comply with tests. - Added gitignore. - Modified dataset path in tests. - Modified compiler target and mahout dependency in pom. --- .gitignore | 8 + pom.xml | 10 +- .../nearsoft/academy/MovieRecommender.java | 160 ++++++++++++++++++ .../recommendation/MovieRecommenderTest.java | 4 +- 4 files changed, 177 insertions(+), 5 deletions(-) create mode 100644 .gitignore create mode 100644 src/main/java/nearsoft/academy/MovieRecommender.java diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..89c8107 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +data/ +target/ +.DS_Store +.classpath +.project +.settings/ +.vscode/ +src/.DS_Store \ No newline at end of file diff --git a/pom.xml b/pom.xml index 8169ff7..75c3da6 100644 --- a/pom.xml +++ b/pom.xml @@ -12,14 +12,16 @@ UTF-8 + 1.7 + 1.7 - org.apache.mahout - mahout-core - 0.9 - + org.apache.mahout + mahout-mr + 0.13.0 + junit junit diff --git a/src/main/java/nearsoft/academy/MovieRecommender.java b/src/main/java/nearsoft/academy/MovieRecommender.java new file mode 100644 index 0000000..db69e0a --- /dev/null +++ b/src/main/java/nearsoft/academy/MovieRecommender.java @@ -0,0 +1,160 @@ +package nearsoft.academy; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.zip.GZIPInputStream; + +import org.apache.log4j.BasicConfigurator; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +public class MovieRecommender { + final String CSV_PATH = "data/dataset.csv"; + int totalUsers = 0; + int totalProducts = 0; + int totalReviews = 0; + + private Map users= new HashMap(); + private Map products = new HashMap(); + private Map productsReverse = new HashMap(); + private UserBasedRecommender recommender; + + public MovieRecommender(String datasetPath) { + BasicConfigurator.configure(); + try { + BufferedReader file = this.readGZFile(datasetPath); + processFileData(file); + DataModel model = new FileDataModel(new File(CSV_PATH)); + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); + recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); + } catch (Exception e) { + System.err.println(e.getLocalizedMessage()); + } + + return; + } + + private BufferedReader readGZFile(String datasetPath) throws IOException { + FileInputStream file = new FileInputStream(datasetPath); + GZIPInputStream gz = new GZIPInputStream(file); + InputStreamReader reader = new InputStreamReader(gz); + BufferedReader br = new BufferedReader(reader); + return br; + } + + private void processFileData(BufferedReader file) throws IOException { + List requiredFields = Arrays.asList("review/userId:", "product/productId:", "review/score:"); + + String user = null; + String product = null; + String review = null; + String data; + int dataCount = 0; + + File csvFile = new File(CSV_PATH); + + if (csvFile.exists()) { + csvFile.delete(); + } + + FileWriter fileWriter = new FileWriter(csvFile); + + String line = file.readLine(); + String[] split; + while (line != null) { + if (dataCount == 3) { + dataCount = 0; + data = this.users.get(user) + "," + this.products.get(product) + "," + review + "\n"; + fileWriter.write(data); + } + split = line.split(" "); + if (requiredFields.contains(split[0])) { + switch (split[0]) { + case "review/userId:": + user = split[1]; + dataCount++; + addUserCount(user); + break; + case "product/productId:": + product = split[1]; + dataCount++; + addProductCount(product); + break; + case "review/score:": + review = split[1]; + dataCount++; + this.totalReviews++; + break; + } + } + line = file.readLine(); + } + fileWriter.close(); + return; + } + + private void addUserCount(String user) { + if (!this.users.containsKey(user)) { + this.users.put(user, this.totalUsers); + this.totalUsers++; + } + return; + } + + private void addProductCount(String product) { + if (!this.products.containsKey(product)) { + this.products.put(product, this.totalProducts); + this.productsReverse.put(Long.valueOf(this.totalProducts), product); + this.totalProducts++; + } + return; + } + + public int getTotalReviews() { + return this.totalReviews; + } + + public int getTotalProducts() { + return this.products.size(); + } + + public int getTotalUsers() { + return this.users.size(); + } + + public List getRecommendationsForUser(String user) { + List rec; + List res = new ArrayList(); + String product; + try { + rec = recommender.recommend(this.users.get(user), 3); + } catch (Exception e) { + System.err.println(e.getLocalizedMessage()); + return null; + } + + for (RecommendedItem recommendation : rec) { + product = this.productsReverse.get(recommendation.getItemID()); + res.add(product); + } + + return res; + } +} diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 0d0b1fe..c47fe2c 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -3,6 +3,8 @@ import org.apache.mahout.cf.taste.common.TasteException; import org.junit.Test; +import nearsoft.academy.MovieRecommender; + import java.io.IOException; import java.util.List; @@ -15,7 +17,7 @@ public class MovieRecommenderTest { public void testDataInfo() throws IOException, TasteException { //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); + MovieRecommender recommender = new MovieRecommender("data/movies.txt.gz"); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); assertEquals(889176, recommender.getTotalUsers()); From 55eaeedc61b9163c1cda55f8979218dc976b07e2 Mon Sep 17 00:00:00 2001 From: cavesdev <24899540+cavesdev@users.noreply.github.com> Date: Sun, 24 Oct 2021 15:23:05 -0500 Subject: [PATCH 2/2] Updated readme --- readme.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/readme.md b/readme.md index ce4dc89..9b0a782 100644 --- a/readme.md +++ b/readme.md @@ -9,8 +9,9 @@ This repo contains several common big data exercises. ## Setup -1. Install the JDK 7.0 +1. Install the JDK 17 2. [Download & Install Maven](http://maven.apache.org/download.cgi) +3. Add `movies.txt.gz` file to `/data` folder on root. ## How to run tests