diff --git a/aloja-bench/run_benchs.sh b/aloja-bench/run_benchs.sh index da30d7e7..e60ae53c 100755 --- a/aloja-bench/run_benchs.sh +++ b/aloja-bench/run_benchs.sh @@ -32,16 +32,14 @@ prepare_folder "$DISK" # Save globals at the beginning (for debugging purposes) save_env "$JOB_PATH/config.sh" -# Check if needed to download files and configs -install_files +# Prepare benchmark configuration +benchmark_suite_config # 3.) Run the benchmarks # At this point, if the user presses ctrl+c or the script is killed to clean up afterwards and copy the files if remote is defined update_traps "benchmark_suite_cleanup; rsync_extenal '$JOB_NAME';" "update_logger" -benchmark_suite_config - start_time=$(date '+%s') ######################################################## diff --git a/shell/common/benchmark_BigBench.sh b/shell/common/benchmark_BigBench.sh index 4c88532b..48525d8c 100644 --- a/shell/common/benchmark_BigBench.sh +++ b/shell/common/benchmark_BigBench.sh @@ -18,31 +18,22 @@ if [ ! $user_suplied_bench_list ]; then fi benchmark_suite_config() { - initialize_hadoop_vars - prepare_hadoop_config "$NET" "$DISK" "$BENCH_SUITE" - start_hadoop + add_engine "hadoop" + add_engine "hive" - if [ "$BB_SERVER_DERBY" == "true" ]; then - logger "WARNING: Using Derby DB in client/server mode" - USE_EXTERNAL_DATABASE="true" - initialize_derby_vars "BigBench_DB" - start_derby - else - logger "WARNING: Using Derby DB in embedded mode" - fi + #BigBench uses Spark under specific circumstances: spark-SQL as SQL engine or spark-MLlib as Machine learning framework + if [ "$ENGINE" == "spark_sql" ] || [ "$HIVE_ML_FRAMEWORK" == "spark" ] || [ "$HIVE_ML_FRAMEWORK" == "spark-csv" ] \ + || [ "$HIVE_ML_FRAMEWORK" == "spark-2" ]; then - initialize_hive_vars - prepare_hive_config "$HIVE_SETTINGS_FILE" "$HIVE_SETTINGS_FILE_PATH" + add_engine "spark" - if [ ! -z "$use_spark" ]; then - initialize_spark_vars - prepare_spark_config + if [ "$HIVE_ML_FRAMEWORK" == "spark-2" ]; then + logger "WARNING: Using spark 2 as SQL engine and Machine Learning framework" + SPARK_HIVE="spark_hive-2.1.1" + fi fi - if [ "$HIVE_ENGINE" == "tez" ]; then - initialize_tez_vars - prepare_tez_config -fi + benchmark_suite_start_config initialize_BigBench_vars prepare_BigBench } diff --git a/shell/common/common_BigBench.sh b/shell/common/common_BigBench.sh index 2bcdfc3b..a3e1b5b4 100644 --- a/shell/common/common_BigBench.sh +++ b/shell/common/common_BigBench.sh @@ -1,31 +1,3 @@ -# Start Spark if needed -if [ "$ENGINE" == "spark_sql" ] || [ "$HIVE_ML_FRAMEWORK" == "spark" ] || [ "$HIVE_ML_FRAMEWORK" == "spark-csv" ] || [ "$HIVE_ML_FRAMEWORK" == "spark-2" ]; then - - if [ "$HIVE_ML_FRAMEWORK" == "spark-2" ]; then - logger "WARNING: Using spark 2 as SQL engine and Machine Learning framework" - SPARK_HIVE="spark_hive-2.1.1" - fi - use_spark=true - source_file "$ALOJA_REPO_PATH/shell/common/common_spark.sh" - set_spark_requires -# HIVE_ENGINE="mr" -fi - -# Start Hive -source_file "$ALOJA_REPO_PATH/shell/common/common_hive.sh" -set_hive_requires - -# Start Tez if needed -if [ "$HIVE_ENGINE" == "tez" ]; then - source_file "$ALOJA_REPO_PATH/shell/common/common_tez.sh" - set_tez_requires -fi - -if [ "$BB_SERVER_DERBY" == "true" ]; then - source_file "$ALOJA_REPO_PATH/shell/common/common_derby.sh" - set_derby_requires -fi - BIG_BENCH_FOLDER="Big-Data-Benchmark-for-Big-Bench" if [ "$BENCH_SCALE_FACTOR" == 0 ] ; then #Should only happen when BENCH_SCALE_FACTOR is not set and BENCH_DATA_SIZE < 1GB diff --git a/shell/common/common_benchmarks.sh b/shell/common/common_benchmarks.sh index 33134053..d3fd5714 100644 --- a/shell/common/common_benchmarks.sh +++ b/shell/common/common_benchmarks.sh @@ -162,11 +162,39 @@ get_options() { } +add_engine(){ + bench_engines+=("$1") +} + +source_engines(){ + for engine in "${bench_engines[@]}"; do + source_file "$ALOJA_REPO_PATH/shell/common/common_$engine.sh" + function_call "set_${engine}_requires" + done +} + +init_engines(){ + for engine in "${bench_engines[@]}"; do + function_call "init_$engine" + done +} # Temple functions, re implement in benchmark if needed -benchmark_suite_config() { - logger "DEBUG: No specific ${FUNCNAME[0]} defined for $BENCH_SUITE" +benchmark_suite_start_config() { + logger "INFO: preparing confiuration for $BENCH_SUITE" + + # Source needed engines and mark files to download + source_engines + + # Check if needed to download files and configs + install_files + + # Configure the engine and start services if needed + init_engines + + # Specify which binaries to use for monitoring + set_monit_binaries } # Iterate the specified benchmarks in the suite @@ -1281,9 +1309,6 @@ $($DSH "ls -lah '$HDD/../'; ls -lah '$HDD_TMP/../' " ) else logger "DEBUG: Base dirs created successfully" fi - - # specify which binaries to use for monitoring - set_monit_binaries } # Cleanup after a benchmark suite run, and before starting one diff --git a/shell/common/common_derby.sh b/shell/common/common_derby.sh index 5b450d46..c40edcd1 100644 --- a/shell/common/common_derby.sh +++ b/shell/common/common_derby.sh @@ -1,6 +1,5 @@ source_file "$ALOJA_REPO_PATH/shell/common/common_java.sh" set_java_requires - # Sets the required files to download/copy set_derby_requires() { [ ! "$DERBY_VERSION" ] && die "No DERBY_VERSION specified" @@ -117,6 +116,13 @@ initialize_derby_vars() { fi } +init_derby() { + initialize_derby_vars "Aloja_DB" + logger "WARNING: Using Derby DB in client/server mode" + USE_EXTERNAL_DATABASE="true" + start_derby +} + clean_derby() { stop_derby } \ No newline at end of file diff --git a/shell/common/common_hadoop.sh b/shell/common/common_hadoop.sh index df65536e..ce50d2e0 100644 --- a/shell/common/common_hadoop.sh +++ b/shell/common/common_hadoop.sh @@ -411,6 +411,14 @@ cp $HADOOP_CONF_DIR/* $JOB_PATH/conf_$node/" & fi } + +init_hadoop() { + initialize_hadoop_vars + prepare_hadoop_config "$NET" "$DISK" "$BENCH_SUITE" + use_hadoop=1 # Control variable, useful only in certain benchmarks + start_hadoop +} + # Returns if Hadoop v1 or v2 # $1 the hadoop string (optional, if not uses $HADOOP_VERSION) get_hadoop_major_version() { diff --git a/shell/common/common_hive.sh b/shell/common/common_hive.sh index 016d216f..5063e03b 100644 --- a/shell/common/common_hive.sh +++ b/shell/common/common_hive.sh @@ -1,4 +1,3 @@ -#HIVE SPECIFIC FUNCTIONS source_file "$ALOJA_REPO_PATH/shell/common/common_hadoop.sh" set_hadoop_requires @@ -117,6 +116,12 @@ initialize_hive_vars() { fi } +init_hive() { + initialize_hive_vars + prepare_hive_config + use_hive=1 # Control variable, useful only in certain benchmarks +} + get_hive_major_version() { local hive_string="$HIVE_VERSION" local major_version="" diff --git a/shell/common/common_java.sh b/shell/common/common_java.sh index 4a3c4c25..015be0f6 100644 --- a/shell/common/common_java.sh +++ b/shell/common/common_java.sh @@ -21,6 +21,10 @@ get_java_exports() { fi } +init_java(){ + set_java_requires +} + # Sets the JAVA_HOME for the benchmark # TODO this assumes you are in the head node and only sets it there, should finish export_var_path funct # also that it is run from the main run_bench.sh file diff --git a/shell/common/common_spark.sh b/shell/common/common_spark.sh index 67da0030..4b547c1c 100644 --- a/shell/common/common_spark.sh +++ b/shell/common/common_spark.sh @@ -1,7 +1,3 @@ -#SPARK SPECIFIC FUNCTIONS -source_file "$ALOJA_REPO_PATH/shell/common/common_hadoop.sh" -set_hadoop_requires - # Sets the required files to download/copy set_spark_requires() { [ ! "$SPARK_VERSION" ] && die "No SPARK_VERSION specified" @@ -103,6 +99,12 @@ execute_spark-sql(){ execute_spark "$bench" "$cmd" "$time_exec" "spark-sql" } +init_spark() { + initialize_spark_vars + prepare_spark_config + use_spark=1 # Control variable, useful only in certain benchmarks +} + initialize_spark_vars() { if [ "$clusterType" == "PaaS" ]; then SPARK_HOME="/usr" ## TODO ONLY WORKING IN HDI diff --git a/shell/common/common_tez.sh b/shell/common/common_tez.sh index 12646383..8b4c363d 100644 --- a/shell/common/common_tez.sh +++ b/shell/common/common_tez.sh @@ -38,6 +38,12 @@ initialize_tez_vars() { fi } +init_tez() { + set_tez_requires + initialize_tez_vars + prepare_tez_config +} + # Sets the substitution values for the tez config get_tez_substitutions() { diff --git a/shell/conf/benchmarks_defaults.conf b/shell/conf/benchmarks_defaults.conf index a9d4e0f9..aa1b0c91 100644 --- a/shell/conf/benchmarks_defaults.conf +++ b/shell/conf/benchmarks_defaults.conf @@ -85,7 +85,7 @@ BENCH_SCALE_FACTOR="$(( BENCH_DATA_SIZE / 1000000000 ))" #in GB [ ! "$HIVE_FILEFORMAT" ] && HIVE_FILEFORMAT="ORC" # Available options are: EXTFILE, RCFILE, ORC(default), SEQUENCEFILE, PARQUET, AVRO, [ ! "$HIVE_ENGINE" ] && HIVE_ENGINE="tez" # Available options are: tez (default, hadoop 2 only), mr (MapReduce) [ ! "$HIVE_ML_FRAMEWORK" ] && HIVE_ML_FRAMEWORK="mahout" # Available options are: spark, spark-2, mahout(default) -[ ! "$BB_SERVER_DERBY" ] && BB_SERVER_DERBY="true" # Available options are: true(Server/Client deployment), false(embeded only) +[ ! "$BB_SERVER_DERBY" ] && BB_SERVER_DERBY=1 # Available options are: true(Server/Client deployment), false(embeded only) if [ "$HIVE_ENGINE" == "mr" ]; then #For MapReduce DO NOT do MapJoins, MR uses lots of memory and tends to fail anyways because of high Garbage Collection times. HIVE_JOINS="false"