diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..121f323
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Datasource local storage ignored files
+/../../../../../../../../../:\Users\EAGB22\Documents\Nearsoft Academy\Reset Phase 28-Sep - 26-Oct\Big-Data-Exercises\academy-exercises\.idea/dataSources/
+/dataSources.local.xml
+# Editor-based HTTP Client requests
+/httpRequests/
diff --git a/.idea/.name b/.idea/.name
new file mode 100644
index 0000000..a6f69a2
--- /dev/null
+++ b/.idea/.name
@@ -0,0 +1 @@
+big-data
\ No newline at end of file
diff --git a/.idea/compiler.xml b/.idea/compiler.xml
new file mode 100644
index 0000000..a95a2ab
--- /dev/null
+++ b/.idea/compiler.xml
@@ -0,0 +1,16 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
new file mode 100644
index 0000000..4140949
--- /dev/null
+++ b/.idea/encodings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/jarRepositories.xml b/.idea/jarRepositories.xml
new file mode 100644
index 0000000..a468a99
--- /dev/null
+++ b/.idea/jarRepositories.xml
@@ -0,0 +1,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__asm_asm_3_1.xml b/.idea/libraries/Maven__asm_asm_3_1.xml
new file mode 100644
index 0000000..50a4cd4
--- /dev/null
+++ b/.idea/libraries/Maven__asm_asm_3_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__com_google_guava_guava_16_0.xml b/.idea/libraries/Maven__com_google_guava_guava_16_0.xml
new file mode 100644
index 0000000..3ecdbf4
--- /dev/null
+++ b/.idea/libraries/Maven__com_google_guava_guava_16_0.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__com_sun_jersey_jersey_core_1_8.xml b/.idea/libraries/Maven__com_sun_jersey_jersey_core_1_8.xml
new file mode 100644
index 0000000..05b19db
--- /dev/null
+++ b/.idea/libraries/Maven__com_sun_jersey_jersey_core_1_8.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__com_sun_jersey_jersey_json_1_8.xml b/.idea/libraries/Maven__com_sun_jersey_jersey_json_1_8.xml
new file mode 100644
index 0000000..8c3e9ee
--- /dev/null
+++ b/.idea/libraries/Maven__com_sun_jersey_jersey_json_1_8.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__com_sun_jersey_jersey_server_1_8.xml b/.idea/libraries/Maven__com_sun_jersey_jersey_server_1_8.xml
new file mode 100644
index 0000000..08eb4d0
--- /dev/null
+++ b/.idea/libraries/Maven__com_sun_jersey_jersey_server_1_8.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__com_sun_xml_bind_jaxb_impl_2_2_3_1.xml b/.idea/libraries/Maven__com_sun_xml_bind_jaxb_impl_2_2_3_1.xml
new file mode 100644
index 0000000..6b533c5
--- /dev/null
+++ b/.idea/libraries/Maven__com_sun_xml_bind_jaxb_impl_2_2_3_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__com_thoughtworks_xstream_xstream_1_4_4.xml b/.idea/libraries/Maven__com_thoughtworks_xstream_xstream_1_4_4.xml
new file mode 100644
index 0000000..67ef426
--- /dev/null
+++ b/.idea/libraries/Maven__com_thoughtworks_xstream_xstream_1_4_4.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_beanutils_commons_beanutils_1_7_0.xml b/.idea/libraries/Maven__commons_beanutils_commons_beanutils_1_7_0.xml
new file mode 100644
index 0000000..e391de3
--- /dev/null
+++ b/.idea/libraries/Maven__commons_beanutils_commons_beanutils_1_7_0.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml b/.idea/libraries/Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml
new file mode 100644
index 0000000..acd442c
--- /dev/null
+++ b/.idea/libraries/Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_cli_commons_cli_1_2.xml b/.idea/libraries/Maven__commons_cli_commons_cli_1_2.xml
new file mode 100644
index 0000000..5012ae2
--- /dev/null
+++ b/.idea/libraries/Maven__commons_cli_commons_cli_1_2.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_codec_commons_codec_1_4.xml b/.idea/libraries/Maven__commons_codec_commons_codec_1_4.xml
new file mode 100644
index 0000000..6125f1c
--- /dev/null
+++ b/.idea/libraries/Maven__commons_codec_commons_codec_1_4.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_collections_commons_collections_3_2_1.xml b/.idea/libraries/Maven__commons_collections_commons_collections_3_2_1.xml
new file mode 100644
index 0000000..ae2764e
--- /dev/null
+++ b/.idea/libraries/Maven__commons_collections_commons_collections_3_2_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_configuration_commons_configuration_1_6.xml b/.idea/libraries/Maven__commons_configuration_commons_configuration_1_6.xml
new file mode 100644
index 0000000..6081aef
--- /dev/null
+++ b/.idea/libraries/Maven__commons_configuration_commons_configuration_1_6.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_digester_commons_digester_1_8.xml b/.idea/libraries/Maven__commons_digester_commons_digester_1_8.xml
new file mode 100644
index 0000000..c33d92f
--- /dev/null
+++ b/.idea/libraries/Maven__commons_digester_commons_digester_1_8.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_el_commons_el_1_0.xml b/.idea/libraries/Maven__commons_el_commons_el_1_0.xml
new file mode 100644
index 0000000..28d81bc
--- /dev/null
+++ b/.idea/libraries/Maven__commons_el_commons_el_1_0.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_httpclient_commons_httpclient_3_0_1.xml b/.idea/libraries/Maven__commons_httpclient_commons_httpclient_3_0_1.xml
new file mode 100644
index 0000000..7253720
--- /dev/null
+++ b/.idea/libraries/Maven__commons_httpclient_commons_httpclient_3_0_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_io_commons_io_2_1.xml b/.idea/libraries/Maven__commons_io_commons_io_2_1.xml
new file mode 100644
index 0000000..6c877dd
--- /dev/null
+++ b/.idea/libraries/Maven__commons_io_commons_io_2_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_lang_commons_lang_2_4.xml b/.idea/libraries/Maven__commons_lang_commons_lang_2_4.xml
new file mode 100644
index 0000000..434851f
--- /dev/null
+++ b/.idea/libraries/Maven__commons_lang_commons_lang_2_4.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_logging_commons_logging_1_0_3.xml b/.idea/libraries/Maven__commons_logging_commons_logging_1_0_3.xml
new file mode 100644
index 0000000..66bfbba
--- /dev/null
+++ b/.idea/libraries/Maven__commons_logging_commons_logging_1_0_3.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__commons_net_commons_net_1_4_1.xml b/.idea/libraries/Maven__commons_net_commons_net_1_4_1.xml
new file mode 100644
index 0000000..af5d657
--- /dev/null
+++ b/.idea/libraries/Maven__commons_net_commons_net_1_4_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__javax_activation_activation_1_1.xml b/.idea/libraries/Maven__javax_activation_activation_1_1.xml
new file mode 100644
index 0000000..77943ef
--- /dev/null
+++ b/.idea/libraries/Maven__javax_activation_activation_1_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__javax_xml_bind_jaxb_api_2_2_2.xml b/.idea/libraries/Maven__javax_xml_bind_jaxb_api_2_2_2.xml
new file mode 100644
index 0000000..9736ebe
--- /dev/null
+++ b/.idea/libraries/Maven__javax_xml_bind_jaxb_api_2_2_2.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__javax_xml_stream_stax_api_1_0_2.xml b/.idea/libraries/Maven__javax_xml_stream_stax_api_1_0_2.xml
new file mode 100644
index 0000000..313dd9c
--- /dev/null
+++ b/.idea/libraries/Maven__javax_xml_stream_stax_api_1_0_2.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__junit_junit_4_7.xml b/.idea/libraries/Maven__junit_junit_4_7.xml
new file mode 100644
index 0000000..539774b
--- /dev/null
+++ b/.idea/libraries/Maven__junit_junit_4_7.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_1.xml b/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_1.xml
new file mode 100644
index 0000000..14fdf7e
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_commons_commons_lang3_3_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_commons_commons_math3_3_2.xml b/.idea/libraries/Maven__org_apache_commons_commons_math3_3_2.xml
new file mode 100644
index 0000000..83879f6
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_commons_commons_math3_3_2.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_commons_commons_math_2_1.xml b/.idea/libraries/Maven__org_apache_commons_commons_math_2_1.xml
new file mode 100644
index 0000000..2f660c7
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_commons_commons_math_2_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_hadoop_hadoop_core_1_2_1.xml b/.idea/libraries/Maven__org_apache_hadoop_hadoop_core_1_2_1.xml
new file mode 100644
index 0000000..f602a4b
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_hadoop_hadoop_core_1_2_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_lucene_lucene_analyzers_common_4_6_1.xml b/.idea/libraries/Maven__org_apache_lucene_lucene_analyzers_common_4_6_1.xml
new file mode 100644
index 0000000..731321f
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_lucene_lucene_analyzers_common_4_6_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_lucene_lucene_core_4_6_1.xml b/.idea/libraries/Maven__org_apache_lucene_lucene_core_4_6_1.xml
new file mode 100644
index 0000000..404fb0c
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_lucene_lucene_core_4_6_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_mahout_commons_commons_cli_2_0_mahout.xml b/.idea/libraries/Maven__org_apache_mahout_commons_commons_cli_2_0_mahout.xml
new file mode 100644
index 0000000..05c123c
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_mahout_commons_commons_cli_2_0_mahout.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_mahout_mahout_core_0_9.xml b/.idea/libraries/Maven__org_apache_mahout_mahout_core_0_9.xml
new file mode 100644
index 0000000..5e9f7aa
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_mahout_mahout_core_0_9.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_mahout_mahout_math_0_9.xml b/.idea/libraries/Maven__org_apache_mahout_mahout_math_0_9.xml
new file mode 100644
index 0000000..c6c295a
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_mahout_mahout_math_0_9.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_apache_solr_solr_commons_csv_3_5_0.xml b/.idea/libraries/Maven__org_apache_solr_solr_commons_csv_3_5_0.xml
new file mode 100644
index 0000000..f20e580
--- /dev/null
+++ b/.idea/libraries/Maven__org_apache_solr_solr_commons_csv_3_5_0.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_codehaus_jackson_jackson_core_asl_1_9_12.xml b/.idea/libraries/Maven__org_codehaus_jackson_jackson_core_asl_1_9_12.xml
new file mode 100644
index 0000000..1da4e54
--- /dev/null
+++ b/.idea/libraries/Maven__org_codehaus_jackson_jackson_core_asl_1_9_12.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_codehaus_jackson_jackson_jaxrs_1_7_1.xml b/.idea/libraries/Maven__org_codehaus_jackson_jackson_jaxrs_1_7_1.xml
new file mode 100644
index 0000000..c4f147f
--- /dev/null
+++ b/.idea/libraries/Maven__org_codehaus_jackson_jackson_jaxrs_1_7_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_12.xml b/.idea/libraries/Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_12.xml
new file mode 100644
index 0000000..b34f218
--- /dev/null
+++ b/.idea/libraries/Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_12.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_codehaus_jackson_jackson_xc_1_7_1.xml b/.idea/libraries/Maven__org_codehaus_jackson_jackson_xc_1_7_1.xml
new file mode 100644
index 0000000..a0c939e
--- /dev/null
+++ b/.idea/libraries/Maven__org_codehaus_jackson_jackson_xc_1_7_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_codehaus_jettison_jettison_1_1.xml b/.idea/libraries/Maven__org_codehaus_jettison_jettison_1_1.xml
new file mode 100644
index 0000000..2648d1d
--- /dev/null
+++ b/.idea/libraries/Maven__org_codehaus_jettison_jettison_1_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_5.xml b/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_5.xml
new file mode 100644
index 0000000..255eecc
--- /dev/null
+++ b/.idea/libraries/Maven__org_slf4j_slf4j_api_1_7_5.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__stax_stax_api_1_0_1.xml b/.idea/libraries/Maven__stax_stax_api_1_0_1.xml
new file mode 100644
index 0000000..4de7af3
--- /dev/null
+++ b/.idea/libraries/Maven__stax_stax_api_1_0_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__xmlpull_xmlpull_1_1_3_1.xml b/.idea/libraries/Maven__xmlpull_xmlpull_1_1_3_1.xml
new file mode 100644
index 0000000..da17a2d
--- /dev/null
+++ b/.idea/libraries/Maven__xmlpull_xmlpull_1_1_3_1.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/libraries/Maven__xpp3_xpp3_min_1_1_4c.xml b/.idea/libraries/Maven__xpp3_xpp3_min_1_1_4c.xml
new file mode 100644
index 0000000..4d9db36
--- /dev/null
+++ b/.idea/libraries/Maven__xpp3_xpp3_min_1_1_4c.xml
@@ -0,0 +1,13 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..bc76e50
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,11 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..615cef2
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..c8397c9
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/big-data.iml b/big-data.iml
new file mode 100644
index 0000000..e66f4b8
--- /dev/null
+++ b/big-data.iml
@@ -0,0 +1,57 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java
new file mode 100644
index 0000000..6374424
--- /dev/null
+++ b/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java
@@ -0,0 +1,143 @@
+package nearsoft.academy.bigdata.recommendation;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
+import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
+import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
+import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.UserBasedRecommender;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.zip.GZIPInputStream;
+
+public class MovieRecommender {
+
+ private final String pathGzFile; // path of the gz file
+ private int totalReviews; // integer for total of reviews
+ private int totalProducts; // integer for total of products
+ private int totalUsers; // integer for total of users
+
+ Hashtable products; // Hashtable for products
+ Hashtable users; // Hashtable for users
+ String currentUser = ""; // string for current user, to add it to the csv file
+ String currentProduct = ""; // string for current product, to add it to the csv file
+ String currentScore = ""; // string for current score, to add it to the csv file
+
+ public MovieRecommender(String path){
+ this.pathGzFile = path;
+ this.products = new Hashtable();
+ this.users = new Hashtable();
+ this.totalReviews = 0;
+ this.totalProducts = 0;
+ this.totalUsers = 0;
+ try {
+ this.readFile();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ private void restartVariables(){
+ currentUser = "";
+ currentProduct = "";
+ currentScore = "";
+ }
+
+ private void readFile() throws IOException {
+ FileInputStream file = new FileInputStream(this.pathGzFile);
+ GZIPInputStream gzipInputStream = new GZIPInputStream(file);
+ Reader reader = new InputStreamReader(gzipInputStream);
+ BufferedReader in = new BufferedReader(reader);
+ String line;
+ File fileForRecommendations = new File("fileForRecommendations.csv");
+ FileWriter writerOfFile = new FileWriter(fileForRecommendations);
+ BufferedWriter writer = new BufferedWriter(writerOfFile);
+
+ Pattern productRegex = Pattern.compile("product\\/productId: ([\\D\\d]+)");
+ Pattern userRegex = Pattern.compile("review\\/userId: ([\\D\\d]+)");
+ Pattern scoreRegex = Pattern.compile("review\\/score: ([\\D\\d]+)");
+ Matcher matcher;
+ while ((line = in.readLine()) != null) {
+ matcher = productRegex.matcher(line);
+ if (matcher.matches()) {
+ currentProduct = matcher.group(1);
+ if (!products.containsKey(currentProduct)) {
+ totalProducts += 1;
+ products.put(currentProduct, totalProducts);
+ }
+ }
+ matcher = userRegex.matcher(line);
+ if (matcher.matches()){
+ currentUser = matcher.group(1);
+ if (!users.containsKey(currentUser)) {
+ totalUsers += 1;
+ users.put(currentUser, totalUsers);
+ }
+ totalReviews += 1;
+ }
+ matcher = scoreRegex.matcher(line);
+ if (matcher.matches()) {
+ currentScore = matcher.group(1);
+ }
+ writeToFileForRecommendations(writer);
+ }
+ writer.close();
+ in.close();
+ gzipInputStream.close();
+ }
+
+ private void writeToFileForRecommendations(BufferedWriter writer) throws IOException {
+ if (!currentProduct.equals("") && !currentScore.equals("") && !currentUser.equals("")) {
+ int idUserForRecommender = users.get(currentUser);
+ int idProductForRecommender = products.get(currentProduct);
+ writer.write(idUserForRecommender + "," + idProductForRecommender + "," + currentScore + "\n");
+ restartVariables();
+ }
+ }
+
+ private String getProductId(int value) {
+ for (String key : products.keySet()) {
+ if (products.get(key) == value) {
+ return key;
+ }
+ }
+ return null;
+ }
+
+ public int getTotalReviews(){
+ return totalReviews;
+ }
+
+ public int getTotalProducts(){
+ return totalProducts;
+ }
+
+ public int getTotalUsers(){
+ return totalUsers;
+ }
+
+ public List getRecommendationsForUser(String idUser) throws IOException, TasteException {
+ int userId = users.get(idUser);
+ DataModel model = new FileDataModel(new File("./fileForRecommendations.csv"));
+ UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
+ UserNeighborhood neighborhood = new ThresholdUserNeighborhood(0.1, similarity, model);
+ UserBasedRecommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity);
+ List recommendations = recommender.recommend(userId, 3);
+ List recommendationsOutPut = new ArrayList();
+ for (RecommendedItem recommendation : recommendations) {
+ int value = (int)recommendation.getItemID();
+ String productIdRecommendation = getProductId(value);
+ recommendationsOutPut.add(productIdRecommendation);
+ }
+ return recommendationsOutPut;
+ }
+}
\ No newline at end of file
diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
index 0d0b1fe..fe7874d 100644
--- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
+++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
@@ -15,7 +15,7 @@ public class MovieRecommenderTest {
public void testDataInfo() throws IOException, TasteException {
//download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
- MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");
+ MovieRecommender recommender = new MovieRecommender("movies.txt.gz");
assertEquals(7911684, recommender.getTotalReviews());
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());
diff --git a/target/classes/nearsoft/academy/bigdata/recommendation/MovieRecommender.class b/target/classes/nearsoft/academy/bigdata/recommendation/MovieRecommender.class
new file mode 100644
index 0000000..7bbdd11
Binary files /dev/null and b/target/classes/nearsoft/academy/bigdata/recommendation/MovieRecommender.class differ
diff --git a/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.class b/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.class
new file mode 100644
index 0000000..d66a234
Binary files /dev/null and b/target/test-classes/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.class differ