diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..121f323 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/../../../../../../../../../:\Users\EAGB22\Documents\Nearsoft Academy\Reset Phase 28-Sep - 26-Oct\Big-Data-Exercises\academy-exercises\.idea/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/.name b/.idea/.name new file mode 100644 index 0000000..a6f69a2 --- /dev/null +++ b/.idea/.name @@ -0,0 +1 @@ +big-data \ No newline at end of file diff --git a/.idea/compiler.xml b/.idea/compiler.xml new file mode 100644 index 0000000..a95a2ab --- /dev/null +++ b/.idea/compiler.xml @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/encodings.xml b/.idea/encodings.xml new file mode 100644 index 0000000..4140949 --- /dev/null +++ b/.idea/encodings.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.idea/jarRepositories.xml b/.idea/jarRepositories.xml new file mode 100644 index 0000000..a468a99 --- /dev/null +++ b/.idea/jarRepositories.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__asm_asm_3_1.xml b/.idea/libraries/Maven__asm_asm_3_1.xml new file mode 100644 index 0000000..50a4cd4 --- /dev/null +++ b/.idea/libraries/Maven__asm_asm_3_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_google_guava_guava_16_0.xml b/.idea/libraries/Maven__com_google_guava_guava_16_0.xml new file mode 100644 index 0000000..3ecdbf4 --- /dev/null +++ b/.idea/libraries/Maven__com_google_guava_guava_16_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_sun_jersey_jersey_core_1_8.xml b/.idea/libraries/Maven__com_sun_jersey_jersey_core_1_8.xml new file mode 100644 index 0000000..05b19db --- /dev/null +++ b/.idea/libraries/Maven__com_sun_jersey_jersey_core_1_8.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_sun_jersey_jersey_json_1_8.xml b/.idea/libraries/Maven__com_sun_jersey_jersey_json_1_8.xml new file mode 100644 index 0000000..8c3e9ee --- /dev/null +++ b/.idea/libraries/Maven__com_sun_jersey_jersey_json_1_8.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_sun_jersey_jersey_server_1_8.xml b/.idea/libraries/Maven__com_sun_jersey_jersey_server_1_8.xml new file mode 100644 index 0000000..08eb4d0 --- /dev/null +++ b/.idea/libraries/Maven__com_sun_jersey_jersey_server_1_8.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_sun_xml_bind_jaxb_impl_2_2_3_1.xml b/.idea/libraries/Maven__com_sun_xml_bind_jaxb_impl_2_2_3_1.xml new file mode 100644 index 0000000..6b533c5 --- /dev/null +++ b/.idea/libraries/Maven__com_sun_xml_bind_jaxb_impl_2_2_3_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__com_thoughtworks_xstream_xstream_1_4_4.xml b/.idea/libraries/Maven__com_thoughtworks_xstream_xstream_1_4_4.xml new file mode 100644 index 0000000..67ef426 --- /dev/null +++ b/.idea/libraries/Maven__com_thoughtworks_xstream_xstream_1_4_4.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_beanutils_commons_beanutils_1_7_0.xml b/.idea/libraries/Maven__commons_beanutils_commons_beanutils_1_7_0.xml new file mode 100644 index 0000000..e391de3 --- /dev/null +++ b/.idea/libraries/Maven__commons_beanutils_commons_beanutils_1_7_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml b/.idea/libraries/Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml new file mode 100644 index 0000000..acd442c --- /dev/null +++ b/.idea/libraries/Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_cli_commons_cli_1_2.xml b/.idea/libraries/Maven__commons_cli_commons_cli_1_2.xml new file mode 100644 index 0000000..5012ae2 --- /dev/null +++ b/.idea/libraries/Maven__commons_cli_commons_cli_1_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_codec_commons_codec_1_4.xml b/.idea/libraries/Maven__commons_codec_commons_codec_1_4.xml new file mode 100644 index 0000000..6125f1c --- /dev/null +++ b/.idea/libraries/Maven__commons_codec_commons_codec_1_4.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_collections_commons_collections_3_2_1.xml b/.idea/libraries/Maven__commons_collections_commons_collections_3_2_1.xml new file mode 100644 index 0000000..ae2764e --- /dev/null +++ b/.idea/libraries/Maven__commons_collections_commons_collections_3_2_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_configuration_commons_configuration_1_6.xml b/.idea/libraries/Maven__commons_configuration_commons_configuration_1_6.xml new file mode 100644 index 0000000..6081aef --- /dev/null +++ b/.idea/libraries/Maven__commons_configuration_commons_configuration_1_6.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_digester_commons_digester_1_8.xml b/.idea/libraries/Maven__commons_digester_commons_digester_1_8.xml new file mode 100644 index 0000000..c33d92f --- /dev/null +++ b/.idea/libraries/Maven__commons_digester_commons_digester_1_8.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_el_commons_el_1_0.xml b/.idea/libraries/Maven__commons_el_commons_el_1_0.xml new file mode 100644 index 0000000..28d81bc --- /dev/null +++ b/.idea/libraries/Maven__commons_el_commons_el_1_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_httpclient_commons_httpclient_3_0_1.xml b/.idea/libraries/Maven__commons_httpclient_commons_httpclient_3_0_1.xml new file mode 100644 index 0000000..7253720 --- /dev/null +++ b/.idea/libraries/Maven__commons_httpclient_commons_httpclient_3_0_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_io_commons_io_2_1.xml b/.idea/libraries/Maven__commons_io_commons_io_2_1.xml new file mode 100644 index 0000000..6c877dd --- /dev/null +++ b/.idea/libraries/Maven__commons_io_commons_io_2_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_lang_commons_lang_2_4.xml b/.idea/libraries/Maven__commons_lang_commons_lang_2_4.xml new file mode 100644 index 0000000..434851f --- /dev/null +++ b/.idea/libraries/Maven__commons_lang_commons_lang_2_4.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_logging_commons_logging_1_0_3.xml b/.idea/libraries/Maven__commons_logging_commons_logging_1_0_3.xml new file mode 100644 index 0000000..66bfbba --- /dev/null +++ b/.idea/libraries/Maven__commons_logging_commons_logging_1_0_3.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__commons_net_commons_net_1_4_1.xml b/.idea/libraries/Maven__commons_net_commons_net_1_4_1.xml new file mode 100644 index 0000000..af5d657 --- /dev/null +++ b/.idea/libraries/Maven__commons_net_commons_net_1_4_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__javax_activation_activation_1_1.xml b/.idea/libraries/Maven__javax_activation_activation_1_1.xml new file mode 100644 index 0000000..77943ef --- /dev/null +++ b/.idea/libraries/Maven__javax_activation_activation_1_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__javax_xml_bind_jaxb_api_2_2_2.xml b/.idea/libraries/Maven__javax_xml_bind_jaxb_api_2_2_2.xml new file mode 100644 index 0000000..9736ebe --- /dev/null +++ b/.idea/libraries/Maven__javax_xml_bind_jaxb_api_2_2_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__javax_xml_stream_stax_api_1_0_2.xml b/.idea/libraries/Maven__javax_xml_stream_stax_api_1_0_2.xml new file mode 100644 index 0000000..313dd9c --- /dev/null +++ b/.idea/libraries/Maven__javax_xml_stream_stax_api_1_0_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__junit_junit_4_7.xml b/.idea/libraries/Maven__junit_junit_4_7.xml new file mode 100644 index 0000000..539774b --- /dev/null +++ b/.idea/libraries/Maven__junit_junit_4_7.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_1.xml b/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_1.xml new file mode 100644 index 0000000..14fdf7e --- /dev/null +++ b/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_commons_commons_math3_3_2.xml b/.idea/libraries/Maven__org_apache_commons_commons_math3_3_2.xml new file mode 100644 index 0000000..83879f6 --- /dev/null +++ b/.idea/libraries/Maven__org_apache_commons_commons_math3_3_2.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_commons_commons_math_2_1.xml b/.idea/libraries/Maven__org_apache_commons_commons_math_2_1.xml new file mode 100644 index 0000000..2f660c7 --- /dev/null +++ b/.idea/libraries/Maven__org_apache_commons_commons_math_2_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_hadoop_hadoop_core_1_2_1.xml b/.idea/libraries/Maven__org_apache_hadoop_hadoop_core_1_2_1.xml new file mode 100644 index 0000000..f602a4b --- /dev/null +++ b/.idea/libraries/Maven__org_apache_hadoop_hadoop_core_1_2_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_lucene_lucene_analyzers_common_4_6_1.xml b/.idea/libraries/Maven__org_apache_lucene_lucene_analyzers_common_4_6_1.xml new file mode 100644 index 0000000..731321f --- /dev/null +++ b/.idea/libraries/Maven__org_apache_lucene_lucene_analyzers_common_4_6_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_lucene_lucene_core_4_6_1.xml b/.idea/libraries/Maven__org_apache_lucene_lucene_core_4_6_1.xml new file mode 100644 index 0000000..404fb0c --- /dev/null +++ b/.idea/libraries/Maven__org_apache_lucene_lucene_core_4_6_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_mahout_commons_commons_cli_2_0_mahout.xml b/.idea/libraries/Maven__org_apache_mahout_commons_commons_cli_2_0_mahout.xml new file mode 100644 index 0000000..05c123c --- /dev/null +++ b/.idea/libraries/Maven__org_apache_mahout_commons_commons_cli_2_0_mahout.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_mahout_mahout_core_0_9.xml b/.idea/libraries/Maven__org_apache_mahout_mahout_core_0_9.xml new file mode 100644 index 0000000..5e9f7aa --- /dev/null +++ b/.idea/libraries/Maven__org_apache_mahout_mahout_core_0_9.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_mahout_mahout_math_0_9.xml b/.idea/libraries/Maven__org_apache_mahout_mahout_math_0_9.xml new file mode 100644 index 0000000..c6c295a --- /dev/null +++ b/.idea/libraries/Maven__org_apache_mahout_mahout_math_0_9.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_apache_solr_solr_commons_csv_3_5_0.xml b/.idea/libraries/Maven__org_apache_solr_solr_commons_csv_3_5_0.xml new file mode 100644 index 0000000..f20e580 --- /dev/null +++ b/.idea/libraries/Maven__org_apache_solr_solr_commons_csv_3_5_0.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_codehaus_jackson_jackson_core_asl_1_9_12.xml b/.idea/libraries/Maven__org_codehaus_jackson_jackson_core_asl_1_9_12.xml new file mode 100644 index 0000000..1da4e54 --- /dev/null +++ b/.idea/libraries/Maven__org_codehaus_jackson_jackson_core_asl_1_9_12.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_codehaus_jackson_jackson_jaxrs_1_7_1.xml b/.idea/libraries/Maven__org_codehaus_jackson_jackson_jaxrs_1_7_1.xml new file mode 100644 index 0000000..c4f147f --- /dev/null +++ b/.idea/libraries/Maven__org_codehaus_jackson_jackson_jaxrs_1_7_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_12.xml b/.idea/libraries/Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_12.xml new file mode 100644 index 0000000..b34f218 --- /dev/null +++ b/.idea/libraries/Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_12.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_codehaus_jackson_jackson_xc_1_7_1.xml b/.idea/libraries/Maven__org_codehaus_jackson_jackson_xc_1_7_1.xml new file mode 100644 index 0000000..a0c939e --- /dev/null +++ b/.idea/libraries/Maven__org_codehaus_jackson_jackson_xc_1_7_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_codehaus_jettison_jettison_1_1.xml b/.idea/libraries/Maven__org_codehaus_jettison_jettison_1_1.xml new file mode 100644 index 0000000..2648d1d --- /dev/null +++ b/.idea/libraries/Maven__org_codehaus_jettison_jettison_1_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_5.xml b/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_5.xml new file mode 100644 index 0000000..255eecc --- /dev/null +++ b/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_5.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__stax_stax_api_1_0_1.xml b/.idea/libraries/Maven__stax_stax_api_1_0_1.xml new file mode 100644 index 0000000..4de7af3 --- /dev/null +++ b/.idea/libraries/Maven__stax_stax_api_1_0_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__xmlpull_xmlpull_1_1_3_1.xml b/.idea/libraries/Maven__xmlpull_xmlpull_1_1_3_1.xml new file mode 100644 index 0000000..da17a2d --- /dev/null +++ b/.idea/libraries/Maven__xmlpull_xmlpull_1_1_3_1.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/libraries/Maven__xpp3_xpp3_min_1_1_4c.xml b/.idea/libraries/Maven__xpp3_xpp3_min_1_1_4c.xml new file mode 100644 index 0000000..4d9db36 --- /dev/null +++ b/.idea/libraries/Maven__xpp3_xpp3_min_1_1_4c.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..bc76e50 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,11 @@ + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..615cef2 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..c8397c9 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/big-data.iml b/big-data.iml new file mode 100644 index 0000000..e66f4b8 --- /dev/null +++ b/big-data.iml @@ -0,0 +1,57 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java new file mode 100644 index 0000000..6374424 --- /dev/null +++ b/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java @@ -0,0 +1,143 @@ +package nearsoft.academy.bigdata.recommendation; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; +import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; +import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; +import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +import java.io.*; +import java.util.ArrayList; +import java.util.Hashtable; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.zip.GZIPInputStream; + +public class MovieRecommender { + + private final String pathGzFile; // path of the gz file + private int totalReviews; // integer for total of reviews + private int totalProducts; // integer for total of products + private int totalUsers; // integer for total of users + + Hashtable products; // Hashtable for products + Hashtable users; // Hashtable for users + String currentUser = ""; // string for current user, to add it to the csv file + String currentProduct = ""; // string for current product, to add it to the csv file + String currentScore = ""; // string for current score, to add it to the csv file + + public MovieRecommender(String path){ + this.pathGzFile = path; + this.products = new Hashtable(); + this.users = new Hashtable(); + this.totalReviews = 0; + this.totalProducts = 0; + this.totalUsers = 0; + try { + this.readFile(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + private void restartVariables(){ + currentUser = ""; + currentProduct = ""; + currentScore = ""; + } + + private void readFile() throws IOException { + FileInputStream file = new FileInputStream(this.pathGzFile); + GZIPInputStream gzipInputStream = new GZIPInputStream(file); + Reader reader = new InputStreamReader(gzipInputStream); + BufferedReader in = new BufferedReader(reader); + String line; + File fileForRecommendations = new File("fileForRecommendations.csv"); + FileWriter writerOfFile = new FileWriter(fileForRecommendations); + BufferedWriter writer = new BufferedWriter(writerOfFile); + + Pattern productRegex = Pattern.compile("product\\/productId: ([\\D\\d]+)"); + Pattern userRegex = Pattern.compile("review\\/userId: ([\\D\\d]+)"); + Pattern scoreRegex = Pattern.compile("review\\/score: ([\\D\\d]+)"); + Matcher matcher; + while ((line = in.readLine()) != null) { + matcher = productRegex.matcher(line); + if (matcher.matches()) { + currentProduct = matcher.group(1); + if (!products.containsKey(currentProduct)) { + totalProducts += 1; + products.put(currentProduct, totalProducts); + } + } + matcher = userRegex.matcher(line); + if (matcher.matches()){ + currentUser = matcher.group(1); + if (!users.containsKey(currentUser)) { + totalUsers += 1; + users.put(currentUser, totalUsers); + } + totalReviews += 1; + } + matcher = scoreRegex.matcher(line); + if (matcher.matches()) { + currentScore = matcher.group(1); + } + writeToFileForRecommendations(writer); + } + writer.close(); + in.close(); + gzipInputStream.close(); + } + + private void writeToFileForRecommendations(BufferedWriter writer) throws IOException { + if (!currentProduct.equals("") && !currentScore.equals("") && !currentUser.equals("")) { + int idUserForRecommender = users.get(currentUser); + int idProductForRecommender = products.get(currentProduct); + writer.write(idUserForRecommender + "," + idProductForRecommender + "," + currentScore + "\n"); + restartVariables(); + } + } + + private String getProductId(int value) { + for (String key : products.keySet()) { + if (products.get(key) == value) { + return key; + } + } + return null; + } + + public int getTotalReviews(){ + return totalReviews; + } + + public int getTotalProducts(){ + return totalProducts; + } + + public int getTotalUsers(){ + return totalUsers; + } + + public List getRecommendationsForUser(String idUser) throws IOException, TasteException { + int userId = users.get(idUser); + DataModel model = new FileDataModel(new File("./fileForRecommendations.csv")); + UserSimilarity similarity = new PearsonCorrelationSimilarity(model); + UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model); + UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity); + List recommendations = recommender.recommend(userId, 3); + List recommendationsOutPut = new ArrayList(); + for (RecommendedItem recommendation : recommendations) { + int value = (int)recommendation.getItemID(); + String productIdRecommendation = getProductId(value); + recommendationsOutPut.add(productIdRecommendation); + } + return recommendationsOutPut; + } +} \ No newline at end of file diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java index 0d0b1fe..fe7874d 100644 --- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java +++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java @@ -15,7 +15,7 @@ public class MovieRecommenderTest { public void testDataInfo() throws IOException, TasteException { //download movies.txt.gz from // http://snap.stanford.edu/data/web-Movies.html - MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz"); + MovieRecommender recommender = new MovieRecommender("movies.txt.gz"); assertEquals(7911684, recommender.getTotalReviews()); assertEquals(253059, recommender.getTotalProducts()); assertEquals(889176, recommender.getTotalUsers()); diff --git a/target/classes/nearsoft/academy/bigdata/recommendation/MovieRecommender.class b/target/classes/nearsoft/academy/bigdata/recommendation/MovieRecommender.class new file mode 100644 index 0000000..7bbdd11 Binary files /dev/null and b/target/classes/nearsoft/academy/bigdata/recommendation/MovieRecommender.class differ diff --git a/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.class b/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.class new file mode 100644 index 0000000..d66a234 Binary files /dev/null and b/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.class differ