From 71440b85bcabe8623b9aefa35b06305b5d3fc9c0 Mon Sep 17 00:00:00 2001
From: cavesdev <24899540+cavesdev@users.noreply.github.com>
Date: Sun, 24 Oct 2021 15:17:22 -0500
Subject: [PATCH 1/2] - Added MovieRecommender class to comply with tests. -
Added gitignore. - Modified dataset path in tests. - Modified compiler target
and mahout dependency in pom.
---
.gitignore | 8 +
pom.xml | 10 +-
.../nearsoft/academy/MovieRecommender.java | 160 ++++++++++++++++++
.../recommendation/MovieRecommenderTest.java | 4 +-
4 files changed, 177 insertions(+), 5 deletions(-)
create mode 100644 .gitignore
create mode 100644 src/main/java/nearsoft/academy/MovieRecommender.java
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..89c8107
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,8 @@
+data/
+target/
+.DS_Store
+.classpath
+.project
+.settings/
+.vscode/
+src/.DS_Store
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 8169ff7..75c3da6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -12,14 +12,16 @@
UTF-8
+ 1.7
+ 1.7
- org.apache.mahout
- mahout-core
- 0.9
-
+ org.apache.mahout
+ mahout-mr
+ 0.13.0
+
junit
junit
diff --git a/src/main/java/nearsoft/academy/MovieRecommender.java b/src/main/java/nearsoft/academy/MovieRecommender.java
new file mode 100644
index 0000000..db69e0a
--- /dev/null
+++ b/src/main/java/nearsoft/academy/MovieRecommender.java
@@ -0,0 +1,160 @@
+package nearsoft.academy;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.log4j.BasicConfigurator;
+import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
+import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
+import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
+import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+public class MovieRecommender {
+ final String CSV_PATH = "data/dataset.csv";
+ int totalUsers = 0;
+ int totalProducts = 0;
+ int totalReviews = 0;
+
+ private Map users= new HashMap();
+ private Map products = new HashMap();
+ private Map productsReverse = new HashMap();
+ private UserBasedRecommender recommender;
+
+ public MovieRecommender(String datasetPath) {
+ BasicConfigurator.configure();
+ try {
+ BufferedReader file = this.readGZFile(datasetPath);
+ processFileData(file);
+ DataModel model = new FileDataModel(new File(CSV_PATH));
+ UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
+ UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model);
+ recommender = new GenericUserBasedRecommender(model, neighborhood, similarity);
+ } catch (Exception e) {
+ System.err.println(e.getLocalizedMessage());
+ }
+
+ return;
+ }
+
+ private BufferedReader readGZFile(String datasetPath) throws IOException {
+ FileInputStream file = new FileInputStream(datasetPath);
+ GZIPInputStream gz = new GZIPInputStream(file);
+ InputStreamReader reader = new InputStreamReader(gz);
+ BufferedReader br = new BufferedReader(reader);
+ return br;
+ }
+
+ private void processFileData(BufferedReader file) throws IOException {
+ List requiredFields = Arrays.asList("review/userId:", "product/productId:", "review/score:");
+
+ String user = null;
+ String product = null;
+ String review = null;
+ String data;
+ int dataCount = 0;
+
+ File csvFile = new File(CSV_PATH);
+
+ if (csvFile.exists()) {
+ csvFile.delete();
+ }
+
+ FileWriter fileWriter = new FileWriter(csvFile);
+
+ String line = file.readLine();
+ String[] split;
+ while (line != null) {
+ if (dataCount == 3) {
+ dataCount = 0;
+ data = this.users.get(user) + "," + this.products.get(product) + "," + review + "\n";
+ fileWriter.write(data);
+ }
+ split = line.split(" ");
+ if (requiredFields.contains(split[0])) {
+ switch (split[0]) {
+ case "review/userId:":
+ user = split[1];
+ dataCount++;
+ addUserCount(user);
+ break;
+ case "product/productId:":
+ product = split[1];
+ dataCount++;
+ addProductCount(product);
+ break;
+ case "review/score:":
+ review = split[1];
+ dataCount++;
+ this.totalReviews++;
+ break;
+ }
+ }
+ line = file.readLine();
+ }
+ fileWriter.close();
+ return;
+ }
+
+ private void addUserCount(String user) {
+ if (!this.users.containsKey(user)) {
+ this.users.put(user, this.totalUsers);
+ this.totalUsers++;
+ }
+ return;
+ }
+
+ private void addProductCount(String product) {
+ if (!this.products.containsKey(product)) {
+ this.products.put(product, this.totalProducts);
+ this.productsReverse.put(Long.valueOf(this.totalProducts), product);
+ this.totalProducts++;
+ }
+ return;
+ }
+
+ public int getTotalReviews() {
+ return this.totalReviews;
+ }
+
+ public int getTotalProducts() {
+ return this.products.size();
+ }
+
+ public int getTotalUsers() {
+ return this.users.size();
+ }
+
+ public List getRecommendationsForUser(String user) {
+ List rec;
+ List res = new ArrayList();
+ String product;
+ try {
+ rec = recommender.recommend(this.users.get(user), 3);
+ } catch (Exception e) {
+ System.err.println(e.getLocalizedMessage());
+ return null;
+ }
+
+ for (RecommendedItem recommendation : rec) {
+ product = this.productsReverse.get(recommendation.getItemID());
+ res.add(product);
+ }
+
+ return res;
+ }
+}
diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
index 0d0b1fe..c47fe2c 100644
--- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
+++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
@@ -3,6 +3,8 @@
import org.apache.mahout.cf.taste.common.TasteException;
import org.junit.Test;
+import nearsoft.academy.MovieRecommender;
+
import java.io.IOException;
import java.util.List;
@@ -15,7 +17,7 @@ public class MovieRecommenderTest {
public void testDataInfo() throws IOException, TasteException {
//download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
- MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");
+ MovieRecommender recommender = new MovieRecommender("data/movies.txt.gz");
assertEquals(7911684, recommender.getTotalReviews());
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());
From 55eaeedc61b9163c1cda55f8979218dc976b07e2 Mon Sep 17 00:00:00 2001
From: cavesdev <24899540+cavesdev@users.noreply.github.com>
Date: Sun, 24 Oct 2021 15:23:05 -0500
Subject: [PATCH 2/2] Updated readme
---
readme.md | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/readme.md b/readme.md
index ce4dc89..9b0a782 100644
--- a/readme.md
+++ b/readme.md
@@ -9,8 +9,9 @@ This repo contains several common big data exercises.
## Setup
-1. Install the JDK 7.0
+1. Install the JDK 17
2. [Download & Install Maven](http://maven.apache.org/download.cgi)
+3. Add `movies.txt.gz` file to `/data` folder on root.
## How to run tests