Aloja · alejandromontero · Sep 14, 2017 · fenech · Sep 18, 2017 · alejandromontero
diff --git a/aloja-bench/run_benchs.sh b/aloja-bench/run_benchs.sh
@@ -32,16 +32,14 @@ prepare_folder "$DISK"
 # Save globals at the beginning (for debugging purposes)
 save_env "$JOB_PATH/config.sh"
 
-# Check if needed to download files and configs
-install_files
+# Prepare benchmark configuration
+benchmark_suite_config
 
 # 3.) Run the benchmarks
 
 # At this point, if the user presses ctrl+c or the script is killed to clean up afterwards and copy the files if remote is defined
 update_traps "benchmark_suite_cleanup; rsync_extenal '$JOB_NAME';" "update_logger"
 
-benchmark_suite_config
-
 start_time=$(date '+%s')
 
 ########################################################

diff --git a/shell/common/benchmark_BigBench.sh b/shell/common/benchmark_BigBench.sh
@@ -18,31 +18,22 @@ if [ ! $user_suplied_bench_list ]; then
 fi
 
 benchmark_suite_config() {
-  initialize_hadoop_vars
-  prepare_hadoop_config "$NET" "$DISK" "$BENCH_SUITE"
-  start_hadoop
+  add_engine "hadoop"
+  add_engine "hive"
 
-  if [ "$BB_SERVER_DERBY" == "true" ]; then
-    logger "WARNING: Using Derby DB in client/server mode"
-    USE_EXTERNAL_DATABASE="true"
-    initialize_derby_vars "BigBench_DB"
-    start_derby
-  else
-    logger "WARNING: Using Derby DB in embedded mode"
-  fi
+  #BigBench uses Spark under specific circumstances: spark-SQL as SQL engine or spark-MLlib as Machine learning framework
+  if [ "$ENGINE" == "spark_sql" ] || [ "$HIVE_ML_FRAMEWORK" == "spark" ] || [ "$HIVE_ML_FRAMEWORK" == "spark-csv" ] \
+  || [ "$HIVE_ML_FRAMEWORK" == "spark-2" ]; then
 
-  initialize_hive_vars
-  prepare_hive_config "$HIVE_SETTINGS_FILE" "$HIVE_SETTINGS_FILE_PATH"
+    add_engine "spark"
 
-  if [ ! -z "$use_spark" ]; then
-    initialize_spark_vars
-    prepare_spark_config
+    if [ "$HIVE_ML_FRAMEWORK" == "spark-2" ]; then
+      logger "WARNING: Using spark 2 as SQL engine and Machine Learning framework"
+      SPARK_HIVE="spark_hive-2.1.1"
+    fi
   fi
 
-  if [ "$HIVE_ENGINE" == "tez" ]; then
-    initialize_tez_vars
-    prepare_tez_config
-fi
+  benchmark_suite_start_config
   initialize_BigBench_vars
   prepare_BigBench
 }

diff --git a/shell/common/common_BigBench.sh b/shell/common/common_BigBench.sh
@@ -1,31 +1,3 @@
-# Start Spark if needed
-if [ "$ENGINE" == "spark_sql" ] || [ "$HIVE_ML_FRAMEWORK" == "spark" ] || [ "$HIVE_ML_FRAMEWORK" == "spark-csv" ] || [ "$HIVE_ML_FRAMEWORK" == "spark-2" ]; then
-
-  if [ "$HIVE_ML_FRAMEWORK" == "spark-2" ]; then
-    logger "WARNING: Using spark 2 as SQL engine and Machine Learning framework"
-    SPARK_HIVE="spark_hive-2.1.1"
-  fi
-  use_spark=true
-  source_file "$ALOJA_REPO_PATH/shell/common/common_spark.sh"
-  set_spark_requires
-#  HIVE_ENGINE="mr"
-fi
-
-# Start Hive
-source_file "$ALOJA_REPO_PATH/shell/common/common_hive.sh"
-set_hive_requires
-
-# Start Tez if needed
-if [ "$HIVE_ENGINE" == "tez" ]; then
-  source_file "$ALOJA_REPO_PATH/shell/common/common_tez.sh"
-  set_tez_requires
-fi
-
-if [ "$BB_SERVER_DERBY" == "true" ]; then
-  source_file "$ALOJA_REPO_PATH/shell/common/common_derby.sh"
-  set_derby_requires
-fi
-
 BIG_BENCH_FOLDER="Big-Data-Benchmark-for-Big-Bench"
 
 if [ "$BENCH_SCALE_FACTOR" == 0 ] ; then #Should only happen when BENCH_SCALE_FACTOR is not set and BENCH_DATA_SIZE < 1GB

diff --git a/shell/common/common_benchmarks.sh b/shell/common/common_benchmarks.sh
@@ -162,11 +162,39 @@ get_options() {
 
 }
 
+add_engine(){
+  bench_engines+=("$1")
+}
+
+source_engines(){
+  for engine in "${bench_engines[@]}"; do
+    source_file "$ALOJA_REPO_PATH/shell/common/common_$engine.sh"
+    function_call "set_${engine}_requires"
+  done
+}
+
+init_engines(){
+  for engine in "${bench_engines[@]}"; do
+    function_call "init_$engine"
+  done
+}
 
 # Temple functions, re implement in benchmark if needed
 
-benchmark_suite_config() {
-  logger "DEBUG: No specific ${FUNCNAME[0]} defined for $BENCH_SUITE"
+benchmark_suite_start_config() {
+  logger "INFO: preparing confiuration for $BENCH_SUITE"
+
+  # Source needed engines and mark files to download
+  source_engines
+
+  # Check if needed to download files and configs
+  install_files
+
+  # Configure the engine and start services if needed
+  init_engines
+
+  # Specify which binaries to use for monitoring
+  set_monit_binaries
 }
 
 # Iterate the specified benchmarks in the suite
@@ -1281,9 +1309,6 @@ $($DSH "ls -lah '$HDD/../'; ls -lah '$HDD_TMP/../' " )
   else
     logger "DEBUG: Base dirs created successfully"
   fi
-
-  # specify which binaries to use for monitoring
-  set_monit_binaries
 }
 
 # Cleanup after a benchmark suite run, and before starting one

diff --git a/shell/common/common_derby.sh b/shell/common/common_derby.sh
@@ -1,6 +1,5 @@
 source_file "$ALOJA_REPO_PATH/shell/common/common_java.sh"
 set_java_requires
-
 # Sets the required files to download/copy
 set_derby_requires() {
   [ ! "$DERBY_VERSION" ] && die "No DERBY_VERSION specified"
@@ -117,6 +116,13 @@ initialize_derby_vars() {
   fi
 }
 
+init_derby() {
+  initialize_derby_vars "Aloja_DB"
+  logger "WARNING: Using Derby DB in client/server mode"
+  USE_EXTERNAL_DATABASE="true"
+  start_derby
+}
+
 clean_derby() {
   stop_derby
 }
diff --git a/shell/common/common_hadoop.sh b/shell/common/common_hadoop.sh
@@ -411,6 +411,14 @@ cp $HADOOP_CONF_DIR/* $JOB_PATH/conf_$node/" &
  fi
 }
 
+
+init_hadoop() {
+  initialize_hadoop_vars
+  prepare_hadoop_config "$NET" "$DISK" "$BENCH_SUITE"
+  use_hadoop=1 # Control variable, useful only in certain benchmarks
+  start_hadoop
+}
+
 # Returns if Hadoop v1 or v2
 # $1 the hadoop string (optional, if not uses $HADOOP_VERSION)
 get_hadoop_major_version() {

diff --git a/shell/common/common_hive.sh b/shell/common/common_hive.sh
@@ -1,4 +1,3 @@
-#HIVE SPECIFIC FUNCTIONS
 source_file "$ALOJA_REPO_PATH/shell/common/common_hadoop.sh"
 set_hadoop_requires
 
@@ -117,6 +116,12 @@ initialize_hive_vars() {
   fi
 }
 
+init_hive() {
+  initialize_hive_vars
+  prepare_hive_config
+  use_hive=1 # Control variable, useful only in certain benchmarks
+}
+
 get_hive_major_version() {
   local hive_string="$HIVE_VERSION"
   local major_version=""

diff --git a/shell/common/common_java.sh b/shell/common/common_java.sh
@@ -21,6 +21,10 @@ get_java_exports() {
   fi
 }
 
+init_java(){
+  set_java_requires
+}
+
 # Sets the JAVA_HOME for the benchmark
 # TODO this assumes you are in the head node and only sets it there, should finish export_var_path funct
 # also that it is run from the main run_bench.sh file

diff --git a/shell/common/common_spark.sh b/shell/common/common_spark.sh
@@ -1,7 +1,3 @@
-#SPARK SPECIFIC FUNCTIONS
-source_file "$ALOJA_REPO_PATH/shell/common/common_hadoop.sh"
-set_hadoop_requires
-
 # Sets the required files to download/copy
 set_spark_requires() {
   [ ! "$SPARK_VERSION" ] && die "No SPARK_VERSION specified"
@@ -103,6 +99,12 @@ execute_spark-sql(){
   execute_spark "$bench" "$cmd" "$time_exec" "spark-sql"
 }
 
+init_spark() {
+  initialize_spark_vars
+  prepare_spark_config
+  use_spark=1 # Control variable, useful only in certain benchmarks
+}
+
 initialize_spark_vars() {
   if [ "$clusterType" == "PaaS" ]; then
     SPARK_HOME="/usr" ## TODO ONLY WORKING IN HDI

diff --git a/shell/common/common_tez.sh b/shell/common/common_tez.sh
@@ -38,6 +38,12 @@ initialize_tez_vars() {
   fi
 }
 
+init_tez() {
+  set_tez_requires
+  initialize_tez_vars
+  prepare_tez_config
+}
+
 # Sets the substitution values for the tez config
 get_tez_substitutions() {
 

diff --git a/shell/conf/benchmarks_defaults.conf b/shell/conf/benchmarks_defaults.conf
@@ -85,7 +85,7 @@ BENCH_SCALE_FACTOR="$(( BENCH_DATA_SIZE / 1000000000 ))" #in GB
 [ ! "$HIVE_FILEFORMAT" ] && HIVE_FILEFORMAT="ORC" # Available options are: EXTFILE, RCFILE, ORC(default), SEQUENCEFILE, PARQUET, AVRO,
 [ ! "$HIVE_ENGINE" ] && HIVE_ENGINE="tez" # Available options are: tez (default, hadoop 2 only), mr (MapReduce)
 [ ! "$HIVE_ML_FRAMEWORK" ] && HIVE_ML_FRAMEWORK="mahout" # Available options are: spark, spark-2, mahout(default)
-[ ! "$BB_SERVER_DERBY" ] && BB_SERVER_DERBY="true" # Available options are: true(Server/Client deployment), false(embeded only)
+[ ! "$BB_SERVER_DERBY" ] && BB_SERVER_DERBY=1 # Available options are: true(Server/Client deployment), false(embeded only)
 
 if [ "$HIVE_ENGINE" == "mr" ]; then #For MapReduce DO NOT do MapJoins, MR uses lots of memory and tends to fail anyways because of high Garbage Collection times.
   HIVE_JOINS="false"