diff --git a/.gitignore b/.gitignore index 8128f6edc..b1109c0fa 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ services/libicecc.la suse/icecream.spec doc/*.1 doc/*.7 +doc/*.gz doc/index.html tests/results tests/test-suite.log @@ -50,3 +51,7 @@ tests/testargs.log tests/testargs.trs tests/test-setup.sh tests/listing.txt +unittests/testargs +unittests/testargs.log +unittests/testargs.trs +unittests/test-suite.log diff --git a/client/Makefile.am b/client/Makefile.am index 463b2403f..3a04d4783 100644 --- a/client/Makefile.am +++ b/client/Makefile.am @@ -27,6 +27,8 @@ AM_CPPFLAGS = \ -DPLIBDIR=\"$(pkglibexecdir)\" \ -I$(top_srcdir)/services \ -I$(top_srcdir)/ +AM_CXXFLAGS = \ + -std="c++11" EXTRA_DIST = icecc-create-env diff --git a/client/arg.cpp b/client/arg.cpp index 88b708f35..c1a494cd3 100644 --- a/client/arg.cpp +++ b/client/arg.cpp @@ -218,7 +218,8 @@ static bool is_argument_with_space(const char* argument) "--include-with-prefix-after", "-iwithprefixbefore", "--include-with-prefix-before", - "-iwithsysroot" + "-iwithsysroot", + "-imsvc" }; for( size_t i = 0; i < sizeof( arguments ) / sizeof( arguments[ 0 ] ); ++i ) { @@ -230,6 +231,163 @@ static bool is_argument_with_space(const char* argument) return false; } +static bool isClangClArgument( const char* argument ) +{ + // List of arguments was taken from a "clang-cl /?" call + + static const char* const arguments[] = + { + "/arch:", + "/bigobj", // not in the help output, but occurs + "/Brepro", + "/C", + "/c", + "/D", + "/d1PP", + "/d1reportAllClassLayout", + "/diagnostics:", + "/EH", + "/EP", + "/execution-charset:", + "/E", + "/fallback", + "/FA", + "/Fa", + "/Fd", // not in the help output, but occurs + "/Fe", + "/Fi", + "/Fo", + "/fp:", + "/Fp", + "/GA", + "/Gd", + "/GF-", + "/GR-", + "/Gregcall", + "/GR", + "/Gr", + "/GS", + "/Gs", + "/guard:", + "/Gv", + "/Gw", + "/GX", + "/Gy", + "/Gz", + "/J", + "/LD", + "/MD", + "/MT", + "/nologo", // not in the help output, but occurs + "/O", + "/P", + "/Qvec", + "/RTC1", + "/RTCc", + "/RTCs", + "/RTCu", + "/showIncludes", + "/source-charset:", + "/std:", + "/TC", + "/TP", + "/utf-8", + "/vd", + "/vm", + "/volatile:", + "/W", + "/w", + "/X", + "/Y", + "/Z7", + "/Zc:", + "/Zd", + "/Zi", + "/Zl", + "/Zp", + "/Zs", + "--analyze", + "-faddrsig", + "-fansi-escape-codes", + "-fblocks", + "-fcf-protection", + "-fcolor-diagnostics", + "-fcomplete-member-pointers", + "-fcoverage-mapping", + "-fdebug-macro", + "-fdelayed-template-parsing", + "-fdiagnostics-absolute-paths", + "-fdiagnostics-parseable-fixits", + "-flto", + "-fmerge-all-constants", + "-fms-compatibility", + "-fms-extensions", + "-fmsc-version=", + "-fno-addrsig", + "-fno-builtin", + "-fno-complete-member-pointers", + "-fno-coverage-mapping", + "-fno-debug-macro", + "-fno-delayed-template-parsing", + "-fno-sanitize-", + "-fno-standalone-debug", + "-fobjc-runtime=", + "-fprofile-instr-", + "-fsanitize", + "-fstandalone-debug", + "-fwhole-program-vtables", + "-gcodeview", + "-gline-tables-only", + "-miamcu", + "-nobuiltininc", + "-Qunused-arguments", + "-R", + "-std:", // not in the help output, but occurs + "--target=", + "-v", + "-W" + }; + + for ( size_t i = 0; i < sizeof( arguments ) / sizeof( arguments[ 0 ] ); ++i ) + { + if ( str_startswith( arguments[ i ], argument) ) + { + return true; + } + } + + return false; +} + +static bool isClangClArgumentWithSpace( const char* argument ) +{ + // List of arguments was taken from a "clang-cl /?" call + + static const char* const arguments[] = + { + "/FI", + "/imsvc", + "/I", + "/link", + "/o", + "/Tc", + "/Tp", + "/U", + "-mllvm", + "-Xclang" + }; + + for ( size_t i = 0; i < sizeof( arguments ) / sizeof( arguments[ 0 ] ); ++i ) + { + if ( str_startswith( arguments[ i ], argument) ) + { + return true; + } + } + + return false; +} + static bool analyze_assembler_arg(string &arg, list *extrafiles) { const char *pos = arg.c_str(); @@ -369,9 +527,12 @@ bool analyse_argv(const char * const *argv, CompileJob &job, bool icerun, listfirst == "-Xclang" || it->first == "-x" || is_argument_with_space(it->first.c_str())) { ++it; ++it; + } else if (compiler_is_clang_cl(job) && isClangClArgument(it->first.c_str())) { + ++it; } else if (it->second != Arg_Rest || it->first.at(0) == '-' || it->first.at(0) == '@') { ++it; @@ -789,6 +1008,8 @@ bool analyse_argv(const char * const *argv, CompileJob &job, bool icerun, list -1) { + if ((-1 == close(fdWriteStderr)) && (errno != EBADF)){ log_perror("close() failed"); } } @@ -90,8 +96,14 @@ pid_t call_cpp(CompileJob &job, int fdwrite, int fdread) } /* Child. Close the read fd, in case we have one. */ - if (fdread > -1) { - if ((-1 == close(fdread)) && (errno != EBADF)){ + if (fdReadStdout > -1) { + if ((-1 == close(fdReadStdout)) && (errno != EBADF)){ + log_perror("close failed"); + } + } + + if (fdReadStderr > -1) { + if ((-1 == close(fdReadStderr)) && (errno != EBADF)){ log_perror("close failed"); } } @@ -106,7 +118,7 @@ pid_t call_cpp(CompileJob &job, int fdwrite, int fdread) if (dcc_is_preprocessed(job.inputFile())) { /* already preprocessed, great. - write the file to the fdwrite (using cat) */ + write the file to the fdWriteStdout (using cat) */ argv = new char*[2 + 1]; argv[0] = strdup("/bin/cat"); argv[1] = strdup(job.inputFile().c_str()); @@ -164,8 +176,10 @@ pid_t call_cpp(CompileJob &job, int fdwrite, int fdread) int argc = flags.size(); argc++; // the program - argc += 2; // -E file.i + argc += 1; // -E argc += 1; // -frewrite-includes / -fdirectives-only + argc += 3; // clang-cl + argc += 1; // inputFile argv = new char*[argc + 1]; argv[0] = strdup(find_compiler(job).c_str()); int i = 1; @@ -175,7 +189,6 @@ pid_t call_cpp(CompileJob &job, int fdwrite, int fdread) } argv[i++] = strdup("-E"); - argv[i++] = strdup(job.inputFile().c_str()); if (compiler_only_rewrite_includes(job)) { if( compiler_is_clang(job)) { @@ -185,6 +198,15 @@ pid_t call_cpp(CompileJob &job, int fdwrite, int fdread) } } + if ( compiler_is_clang_cl( job ) ) + { + argv[ i++ ] = strdup( "-Xclang" ); + argv[ i++ ] = strdup( "-fcxx-exceptions" ); // necessary for boost::throw_exception + argv[ i++ ] = strdup( "--" ); // handle all following arguments as file + } + + argv[i++] = strdup(job.inputFile().c_str()); + argv[i++] = 0; } @@ -195,11 +217,16 @@ pid_t call_cpp(CompileJob &job, int fdwrite, int fdread) } trace() << "preparing source to send: " << argstxt << endl; - if (fdwrite != STDOUT_FILENO) { + if (fdWriteStdout != STDOUT_FILENO) { /* Ignore failure */ close(STDOUT_FILENO); - dup2(fdwrite, STDOUT_FILENO); - close(fdwrite); + dup2(fdWriteStdout, STDOUT_FILENO); + close(fdWriteStdout); + } + + if (fdWriteStderr > -1) + { + dup2( fdWriteStderr, STDERR_FILENO ); } dcc_increment_safeguard(SafeguardStepCompiler); diff --git a/client/icecc-create-env.in b/client/icecc-create-env.in index 4fea59c92..c9d355f68 100755 --- a/client/icecc-create-env.in +++ b/client/icecc-create-env.in @@ -83,8 +83,12 @@ convert_path_cdup () { local filename="$1" local directory=$(dirname $filename) - local fixed_directory=$(cd "$directory" >/dev/null && pwd) - echo ${fixed_directory}/$(basename $filename) + if [ -d "$directory" ]; then + local fixed_directory=$(cd "$directory" >/dev/null && pwd) + echo ${fixed_directory}/$(basename $filename) + else + echo $(realpath -m $filename) + fi } add_file () @@ -121,6 +125,17 @@ add_file () # Only call ldd when it makes sense if file -L "$path" | grep 'ELF' > /dev/null 2>&1; then if ! file -L "$path" | grep 'static' > /dev/null 2>&1; then + # Determine rpaths + local_dir=$( dirname $path ) + remote_dir=$( dirname $name ) + i=0 + while read line; do + RUNPATH[ $i ]="$line" + RUNPATH_LOCAL[ $i ]=$( realpath -s "${line/\$ORIGIN/$local_dir}" ) + RUNPATH_REMOTE[ $i ]=$( realpath -s "${line/\$ORIGIN/$remote_dir}" ) + i=$(( i + 1 )) + done < <( objdump -p "$path" | grep RUNPATH | awk '{print $2}' ) + # ldd now outputs ld as /lib/ld-linux.so.xx on current nptl based glibc # this regexp parse the outputs like: # ldd /usr/bin/gcc @@ -145,7 +160,23 @@ add_file () fi if test -n "$usebaselib"; then lib=$baselib - add_file "$lib" + + # Check rpaths + remote_libpath=$( realpath -s "$lib" ) + i=0 + while [[ i -lt ${#RUNPATH[@]} ]]; do + if [[ "$remote_libpath" =~ ^${RUNPATH_LOCAL[$i]} ]]; then + remote_libpath="${remote_libpath/${RUNPATH_LOCAL[$i]}/${RUNPATH_REMOTE[$i]}}" + break + fi + i=$(( i + 1 )) + done + + if [[ "$lib" != "$remote_libpath" ]]; then + add_file "$lib" "$remote_libpath" + else + add_file "$lib" + fi else # Optimization: We are adding a library we got from ldd output, so avoid # using ldd on it, as it should not find more than this ldd. @@ -191,7 +222,7 @@ add_file () # and prefer that on the assumption that it is a more generic one. local baselib=$(echo "$lib" | sed 's,\(/[^/]*\)/.*\(/[^/]*\)$,\1\2,') test -f "$baselib" && lib=$baselib - add_file "$lib" "$libinstall" + add_file "$lib" "$libinstall" done fi fi @@ -205,7 +236,11 @@ search_addfile() local file_installdir=$3 local file="" - file=$($compiler -print-prog-name=$file_name) + if echo "$compiler" | grep "clang-cl"; then + file=$file_name + else + file=$($compiler -print-prog-name=$file_name) + fi if test -z "$file" || test "$file" = "$file_name" || ! test -e "$file"; then file=$($compiler -print-file-name=$file_name) @@ -295,10 +330,25 @@ else fi if echo "$test_output" | grep -q '^clang 1 gcc.*'; then clang=1 - # With clang, -print-prog-name gives the full path to the actual clang binary, - # allowing to bypass any possible wrapper script etc. Note we must pass - # just the binary name, not full path. - added_clang=$($1 -print-prog-name=$(basename $1)) + if echo "$1" | grep "clang-cl"; then + # clang-cl has no -print-prog-name parameter, so we have to + # get the location of the actual clang binary from clang-cl -v output, which prints + # (to stderr) clangs path as InstalledDir. + added_clang=$($1 -v 2>&1 | grep InstalledDir: | sed 's/^InstalledDir: //') + if test -z "$added_clang"; then + echo Failed to find clang-cl location. + exit 1 + fi + added_clang=${added_clang}/clang-cl + if ! test -x "$added_clang"; then + added_clang=$(command -v $added_clang) + fi + else + # With clang, -print-prog-name gives the full path to the actual clang binary, + # allowing to bypass any possible wrapper script etc. Note we must pass + # just the binary name, not full path. + added_clang=$($1 -print-prog-name=$(basename $1)) + fi added_compilerwrapper=@PKGLIBEXECDIR@/compilerwrapper elif echo "$test_output" | grep -q 'clang __clang__ gcc.*'; then gcc=1 @@ -479,8 +529,11 @@ if test -n "$clang"; then add_file $added_compilerwrapper /usr/bin/gcc add_file $added_compilerwrapper /usr/bin/g++ - search_addfile $orig_clang as /usr/bin - search_addfile $orig_clang objcopy /usr/bin + search_addfile $added_clang as /usr/bin + search_addfile $added_clang objcopy /usr/bin + search_addfile $added_clang clang /usr/bin + search_addfile $added_clang clang++ /usr/bin + search_addfile $added_clang clang-cl /usr/bin # HACK: Clang4.0 and later access /proc/cpuinfo and report an error when they fail # to find it, even if they use a fallback mechanism, making the error useless @@ -491,6 +544,33 @@ if test -n "$clang"; then touch $tempdir/fakeproc/proc/cpuinfo add_file $tempdir/fakeproc/proc/cpuinfo /proc/cpuinfo fi + + # clang always uses its internal .h files + if echo "$orig_clang" | grep "clang-cl"; then + tmp_clang=`echo ${orig_clang:0:-3}` + clangincludes=$($tmp_clang -print-file-name=include/limits.h) + else + search_addfile $added_clang clang /usr/bin + search_addfile $added_clang clang++ /usr/bin + + clangincludes=$($orig_clang -print-file-name=include/limits.h) + fi + if test -z "$clangincludes"; then + echo $orig_clang cannot find its includes + exit 1 + fi + clangincludes=$(dirname $(abs_path $clangincludes)) + for file in $(find $clangincludes -type f); do + add_file "$file" + done + + # clang also needs the c++ headers at /usr/include... + clangstl=$(dirname $(dirname $(abs_path $orig_clang)))/include/c++/v1 + for file in $(find $clangstl -type f); do + relative=$(realpath --relative-to=$clangstl $file) + add_file "$file" /usr/include/$relative + done + fi # Do not do any prefix stripping on extra files, they (e.g. clang plugins) are usually diff --git a/client/local.cpp b/client/local.cpp index eac086a66..b8102fccb 100644 --- a/client/local.cpp +++ b/client/local.cpp @@ -153,6 +153,16 @@ bool compiler_is_clang(const CompileJob &job) return job.compilerName().find("clang") != string::npos; } +bool compiler_is_clang_cl(const CompileJob &job) +{ + if ( !compiler_is_clang( job ) ) + { + return false; + } + + return job.compilerName().find( "clang-cl" ) != string::npos; +} + /* Clang works suboptimally when handling an already preprocessed source file, for example error messages quote (already preprocessed) parts of the source. @@ -166,6 +176,11 @@ works similarly to -frewrite-includes (although it's not exactly the same). */ bool compiler_only_rewrite_includes(const CompileJob &job) { + if ( compiler_is_clang_cl( job ) ) + { + return false; + } + if( job.blockRewriteIncludes()) { return false; } @@ -248,15 +263,20 @@ int build_local(CompileJob &job, MsgChannel *local_daemon, struct rusage *used) arguments.push_back(compiler_name); appendList(arguments, job.allFlags()); - if (!job.inputFile().empty()) { - arguments.push_back(job.inputFile()); - } - if (!job.outputFile().empty()) { arguments.push_back("-o"); arguments.push_back(job.outputFile()); } + if ( !job.inputFile().empty() ) + { + if ( compiler_is_clang_cl( job ) ) + { + arguments.push_back( "--" ); + } + arguments.push_back( job.inputFile() ); + } + vector argv; string argstxt; @@ -273,6 +293,7 @@ int build_local(CompileJob &job, MsgChannel *local_daemon, struct rusage *used) trace() << "invoking:" << argstxt << endl; if (!local_daemon) { + log_block b("dcc_lock_host local compiler"); if (!dcc_lock_host()) { log_error() << "can't lock for local job" << endl; return EXIT_DISTCC_FAILED; diff --git a/client/main.cpp b/client/main.cpp index c57cc5819..7a3786499 100644 --- a/client/main.cpp +++ b/client/main.cpp @@ -93,6 +93,7 @@ static void dcc_show_usage(void) " ICECC_CC set C compiler name (default gcc).\n" " ICECC_CXX set C++ compiler name (default g++).\n" " ICECC_REMOTE_CPP set to 1 or 0 to override remote preprocessing\n" + " ICECC_CLANG_REMOTE_CPP set to 1 or 0 to override remote preprocessing\n" " ICECC_IGNORE_UNVERIFIED if set, hosts where environment cannot be verified are not used.\n" " ICECC_EXTRAFILES additional files used in the compilation.\n" " ICECC_COLOR_DIAGNOSTICS set to 1 or 0 to override color diagnostics support.\n" @@ -100,6 +101,7 @@ static void dcc_show_usage(void) " ICECC_COMPRESSION if set, the libzstd compression level (1 to 19, default: 1)\n" " ICECC_ENV_COMPRESSION compression type for icecc environments [none|gzip|bzip2|zstd|xz]\n" " ICECC_SLOW_NETWORK set to 1 to send network data in smaller chunks\n" + " ICECC_TEST_REMOTEBUILD if set always try to build remote\n" ); } @@ -284,6 +286,8 @@ class ArgumentExpander int main(int argc, char **argv) { + std::list< std::string > errors; + // expand @responsefile contents to arguments in argv array ArgumentExpander expand(&argc, &argv); @@ -540,6 +544,11 @@ int main(int argc, char **argv) if (ret == 0) { local_daemon->send_msg(EndMsg()); } + else + { + // Probably simple an error in source code, so that the file wouldn't compile + errors.push_back( "Exit code of compiler was not 0" ); + } } catch (remote_error& error) { // log the 'local cpp invocation failed' message by default, so that it's more // obvious why the cpp output is there (possibly) twice @@ -548,6 +557,7 @@ int main(int argc, char **argv) else log_info() << "local build forced by remote exception: " << error.what() << endl; local = true; + errors.push_back( error.what() ); } catch (client_error& error) { if (remote_daemon.size()) { @@ -558,6 +568,7 @@ int main(int argc, char **argv) endl; } + errors.push_back( std::string( error.what() ) + " (" + remote_daemon.c_str() + ")" ); #if 0 /* currently debugging a client? throw an error then */ if (debug_level > Error) { @@ -567,6 +578,13 @@ int main(int argc, char **argv) local = true; } + + for ( const auto &error : errors ) + { + local_daemon->send_msg( JobErrorMsg( 0, false, error ) ); + } + errors.clear(); + if (local) { // TODO It'd be better to reuse the connection, but the daemon // internal state gets confused for some reason, so work that around @@ -586,7 +604,9 @@ int main(int argc, char **argv) Msg *startme = 0L; /* Inform the daemon that we like to start a job. */ - if (local_daemon->send_msg(JobLocalBeginMsg(0, get_absfilename(job.outputFile())))) { + if (local_daemon->send_msg(JobLocalBeginMsg(0, get_absfilename(job.inputFile()), + get_absfilename(job.outputFile()), job.language(), + job.compilerName()))) { /* Now wait until the daemon gives us the start signal. 40 minutes should be enough for all normal compile or link jobs. */ startme = local_daemon->get_msg(40 * 60); @@ -600,6 +620,11 @@ int main(int argc, char **argv) return build_local(job, 0); } + for ( const auto &error : errors ) + { + local_daemon->send_msg( JobErrorMsg( 0, true, error ) ); + } + ret = build_local(job, local_daemon, &ru); delete startme; } diff --git a/client/remote.cpp b/client/remote.cpp index 8fe817287..831fd4228 100644 --- a/client/remote.cpp +++ b/client/remote.cpp @@ -21,6 +21,8 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ +//#define DEBUG_CPP_FILE + #include "config.h" #include @@ -254,67 +256,193 @@ static void check_for_failure(Msg *msg, MsgChannel *cserver) } } -static void write_fd_to_server(int fd, MsgChannel *cserver) +static void write_source_chunk( unsigned char *buffer, off_t &offset, size_t &uncompressed, size_t &compressed, + MsgChannel *cserver, FILE *debugOut ) +{ + if ( offset ) + { + FileChunkMsg fcmsg( buffer, offset ); + if ( debugOut ) + { + fwrite( buffer, offset, 1, debugOut ); + } + + if ( !cserver->send_msg( fcmsg ) ) + { + Msg *m = cserver->get_msg( 2 ); + check_for_failure( m, cserver ); + + log_error() << "write of source chunk to host " << cserver->name.c_str() << endl; + log_perror( "failed " ); + std::string errmsg; + if ( errno > 0 ) + { + errmsg = " ("; + errmsg += strerror( errno ); + errmsg += ")"; + } + throw client_error(15, "Error 15 - write to host failed" + errmsg); + } + + uncompressed += fcmsg.len; + compressed += fcmsg.compressed; + offset = 0; + } +} + +static void write_fd_to_server(int stdout_fd, int stderr_fd, MsgChannel *cserver) { unsigned char buffer[100000]; // some random but huge number + std::string stderrBuffer; off_t offset = 0; size_t uncompressed = 0; size_t compressed = 0; + fd_set rfds; + struct timeval tv; + FILE *debugOut = nullptr; + +#ifdef DEBUG_CPP_FILE + char debugFilename[ 1000 ]; + struct timeval tp; + gettimeofday( &tp, nullptr ); + sprintf( debugFilename, "/tmp/icecc_debug_%ld.cpp", tp.tv_sec * 1000 + tp.tv_usec / 1000 ); + debugOut = fopen( debugFilename, "w" ); +#endif do { - ssize_t bytes; + bool again = false; - do { - bytes = read(fd, buffer + offset, sizeof(buffer) - offset); + FD_ZERO( &rfds ); - if (bytes < 0 && (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK)) { - continue; - } + FD_SET( stdout_fd, &rfds ); + int maxFds = stdout_fd; + + if ( stderr_fd > 0 ) + { + FD_SET( stderr_fd, &rfds ); + maxFds = std::max( maxFds, stderr_fd ); + } + + tv.tv_sec = 600; // if the other side is slow... + tv.tv_usec = 0; - if (bytes < 0) { - log_perror("write_fd_to_server() reading from fd"); - close(fd); - throw client_error(16, "Error 16 - error reading local file"); + const int retval = select( maxFds + 1, &rfds, nullptr, nullptr, &tv ); + + if ( retval < 0 ) + { + log_perror( "select" ); + } + else if ( retval ) + { + if ( FD_ISSET( stdout_fd, &rfds ) ) + { + const auto bytes = read( stdout_fd, buffer + offset, sizeof( buffer ) - offset ); + + if ( bytes < 0 ) + { + if ( errno == EINTR || errno == EAGAIN ) + { + again = true; } + else + { + log_perror( "reading from stdout_fd" ); + close( stdout_fd ); + throw client_error( 16, "Error 16 - error reading local cpp file" ); + } + } - break; - } while (1); + offset += bytes; - offset += bytes; + if ( !bytes || offset == sizeof( buffer ) ) + { + write_source_chunk( buffer, offset, uncompressed, compressed, cserver, debugOut ); + } - if (!bytes || offset == sizeof(buffer)) { - if (offset) { - FileChunkMsg fcmsg(buffer, offset); + if ( bytes ) + { + again = true; + } + } - if (!cserver->send_msg(fcmsg)) { - Msg *m = cserver->get_msg(2); - check_for_failure(m, cserver); + if ( stderr_fd > 0 && FD_ISSET( stderr_fd, &rfds ) ) + { + // Redirect all preprocessor output beginning with "Note: including file:" from STDERR toSTDOUT. + // This lines are used by Ninja to build the dependencies. - log_error() << "write of source chunk to host " - << cserver->name.c_str() << endl; - log_perror("failed "); - close(fd); - throw client_error(15, "Error 15 - write to host failed"); - } + char tmpBuffer[ 100000 ]; + const auto bytes = read( stderr_fd, tmpBuffer, sizeof( tmpBuffer ) - 1 ); - uncompressed += fcmsg.len; - compressed += fcmsg.compressed; - offset = 0; - } + if ( bytes > 0 ) + { + again = true; - if (!bytes) { - break; - } + tmpBuffer[ bytes ] = '\0'; + stderrBuffer.append( tmpBuffer ); + } + } + } + else + { + throw client_error( 103, "Error 103 - select timeout" ); + } + + if ( again ) + { + continue; + } + + break; + } + while ( true ); + + // Send all what is still in the Buffer + write_source_chunk( buffer, offset, uncompressed, compressed, cserver, debugOut ); + + if ( ( -1 == close( stdout_fd ) ) && ( errno != EBADF ) ) + { + log_perror( "close failed" ); + } + + if ( stderr_fd > 0 ) + { + close( stderr_fd ); + + std::string::size_type pos = 0; + std::string::size_type prevPos = 0; + const char delimiter = '\n'; + + if ( stderrBuffer.back() != delimiter ) + { + stderrBuffer.push_back( delimiter ); + } + + while ( ( pos = stderrBuffer.find( delimiter, pos ) ) != std::string::npos ) + { + std::string line = stderrBuffer.substr( prevPos, pos - prevPos + 1 ); + + if ( line.find( "Note: including file:" ) == 0 ) + { + fwrite( line.c_str(), line.length(), 1, stderr ); + } + else + { + fwrite( line.c_str(), line.length(), 1, stdout ); } - } while (1); - if (compressed) - trace() << "sent " << compressed << " bytes (" << (compressed * 100 / uncompressed) << - "%)" << endl; + prevPos = ++pos; + } + } - if ((-1 == close(fd)) && (errno != EBADF)){ - log_perror("close failed"); + if ( compressed ) + { + trace() << "sent " << compressed << " bytes (" << ( compressed * 100 / uncompressed ) << "%)" << endl; } + +#ifdef DEBUG_CPP_FILE + fclose( debugOut ); + trace() << "wrote debug file: " << debugFilename << " " << endl; +#endif } static void receive_file(const string& output_file, MsgChannel* cserver) @@ -421,6 +549,12 @@ static int build_remote_int(CompileJob &job, UseCSMsg *usecs, MsgChannel *local_ throw client_error(2, "Error 2 - no server found at " + hostname); } + if ( compiler_is_clang_cl( job ) && !IS_PROTOCOL_107( cserver ) ) + { + log_error() << "server " << hostname << " is too old for clang-cl" << endl; + throw client_error( 104, "Error 104 - server " + hostname + " too old for clang-cl" ); + } + if (!got_env) { log_block b("Transfer Environment"); // transfer env @@ -443,7 +577,7 @@ static int build_remote_int(CompileJob &job, UseCSMsg *usecs, MsgChannel *local_ throw client_error(5, "Error 5 - unable to open version file:\n\t" + version_file); } - write_fd_to_server(env_fd, cserver); + write_fd_to_server(env_fd, -1, cserver); if (!cserver->send_msg(EndMsg())) { log_error() << "write of environment failed" << endl; @@ -490,7 +624,7 @@ static int build_remote_int(CompileJob &job, UseCSMsg *usecs, MsgChannel *local_ // Older remotes don't set properly -x argument. if(( job.language() == CompileJob::Lang_OBJC || job.language() == CompileJob::Lang_OBJCXX ) - && !IS_PROTOCOL_38(cserver)) { + && !IS_PROTOCOL_38(cserver) && !compiler_is_clang_cl(job)) { job.appendFlag( "-x", Arg_Remote ); job.appendFlag( job.language() == CompileJob::Lang_OBJC ? "objective-c" : "objective-c++", Arg_Remote ); } @@ -507,6 +641,7 @@ static int build_remote_int(CompileJob &job, UseCSMsg *usecs, MsgChannel *local_ if (!preproc_file) { int sockets[2]; + int ppOut[2]; if (create_large_pipe(sockets) != 0) { log_perror("build_remote_in pipe"); @@ -514,16 +649,35 @@ static int build_remote_int(CompileJob &job, UseCSMsg *usecs, MsgChannel *local_ throw client_error(32, "Error 18 - (fork error?)"); } - if (!dcc_lock_host()) { - log_error() << "can't lock for local cpp" << endl; - return EXIT_DISTCC_FAILED; + if ( compiler_is_clang_cl( job ) ) + { + if ( pipe( ppOut ) != 0 ) + { + /* for all possible cases, this is something severe */ + throw client_error(32, "Error 18 - (fork error?)"); + } + } + else + { + ppOut[ 0 ] = -1; + ppOut[ 1 ] = -1; + } + +/* + { + log_block b("dcc_lock_host remote preprocessor"); + if (!dcc_lock_host()) { + log_error() << "can't lock for remote cpp" << endl; + return EXIT_DISTCC_FAILED; + } } HostUnlock hostUnlock; // automatic dcc_unlock() +*/ /* This will fork, and return the pid of the child. It will not return for the child itself. If it returns normally it will have closed the write fd, i.e. sockets[1]. */ - pid_t cpp_pid = call_cpp(job, sockets[1], sockets[0]); + pid_t cpp_pid = call_cpp(job, sockets[1], sockets[0], ppOut[1], ppOut[0]); if (cpp_pid == -1) { throw client_error(18, "Error 18 - (fork error?)"); @@ -531,7 +685,7 @@ static int build_remote_int(CompileJob &job, UseCSMsg *usecs, MsgChannel *local_ try { log_block bl2("write_fd_to_server from cpp"); - write_fd_to_server(sockets[0], cserver); + write_fd_to_server(sockets[0], ppOut[0], cserver); } catch (...) { kill(cpp_pid, SIGTERM); throw; @@ -563,7 +717,7 @@ static int build_remote_int(CompileJob &job, UseCSMsg *usecs, MsgChannel *local_ } log_block cpp_block("write_fd_to_server preprocessed"); - write_fd_to_server(cpp_fd, cserver); + write_fd_to_server(cpp_fd, -1, cserver); } if (!cserver->send_msg(EndMsg())) { @@ -574,7 +728,7 @@ static int build_remote_int(CompileJob &job, UseCSMsg *usecs, MsgChannel *local_ Msg *msg; { log_block wait_cs("wait for cs"); - msg = cserver->get_msg(12 * 60); + msg = cserver->get_msg(6 * 60); if (!msg) { throw client_error(14, "Error 14 - error reading message from remote"); @@ -837,7 +991,7 @@ int build_remote(CompileJob &job, MsgChannel *local_daemon, const Environments & fake_filename += get_absfilename(job.inputFile()); - GetCSMsg getcs(envs, fake_filename, job.language(), torepeat, + GetCSMsg getcs(envs, fake_filename, job.language(), job.compilerName(), torepeat, job.targetPlatform(), job.argumentFlags(), preferred_host ? preferred_host : string(), minimalRemoteVersion(job), requiredRemoteFeatures()); @@ -870,9 +1024,12 @@ int build_remote(CompileJob &job, MsgChannel *local_daemon, const Environments & const CharBufferDeleter preproc_holder(preproc); int cpp_fd = open(preproc, O_WRONLY); - if (!dcc_lock_host()) { - log_error() << "can't lock for local cpp" << endl; - return EXIT_DISTCC_FAILED; + { + log_block b("dcc_lock_host local preprocessor"); + if (!dcc_lock_host()) { + log_error() << "can't lock for local cpp" << endl; + return EXIT_DISTCC_FAILED; + } } HostUnlock hostUnlock; // automatic dcc_unlock() @@ -899,7 +1056,7 @@ int build_remote(CompileJob &job, MsgChannel *local_daemon, const Environments & sprintf(rand_seed, "-frandom-seed=%d", rand()); job.appendFlag(rand_seed, Arg_Remote); - GetCSMsg getcs(envs, get_absfilename(job.inputFile()), job.language(), torepeat, + GetCSMsg getcs(envs, get_absfilename(job.inputFile()), job.language(), job.compilerName(), torepeat, job.targetPlatform(), job.argumentFlags(), preferred_host ? preferred_host : string(), minimalRemoteVersion(job), 0); diff --git a/daemon/main.cpp b/daemon/main.cpp index b0dbd95ee..ef3e6f9dd 100644 --- a/daemon/main.cpp +++ b/daemon/main.cpp @@ -135,6 +135,7 @@ struct Client { job_id = 0; channel = 0; job = 0; + language = CompileJob::Lang_Custom, usecsmsg = 0; client_id = 0; status = UNKNOWN; @@ -199,7 +200,10 @@ struct Client { } uint32_t job_id; + string infile; string outfile; // only useful for LINKJOB or TOINSTALL/WAITINSTALL + CompileJob::Language language; + string compiler; MsgChannel *channel; UseCSMsg *usecsmsg; CompileJob *job; @@ -555,6 +559,7 @@ struct Daemon { bool handle_get_cs(Client *client, Msg *msg) __attribute_warn_unused_result__; bool handle_local_job(Client *client, Msg *msg) __attribute_warn_unused_result__; bool handle_job_done(Client *cl, JobDoneMsg *m) __attribute_warn_unused_result__; + bool handle_job_error(Client *client, JobErrorMsg *m) __attribute_warn_unused_result__; bool handle_compile_done(Client *client) __attribute_warn_unused_result__; bool handle_verify_env(Client *client, VerifyEnvMsg *msg) __attribute_warn_unused_result__; bool handle_blacklist_host_env(Client *client, Msg *msg) __attribute_warn_unused_result__; @@ -1491,6 +1496,28 @@ bool Daemon::handle_job_done(Client *cl, JobDoneMsg *m) return send_scheduler(*msg); } +bool Daemon::handle_job_error( Client *cl, JobErrorMsg *m ) +{ + if ( !IS_PROTOCOL_107( scheduler ) ) + { + return true; + } + + JobErrorMsg *msg = static_cast< JobErrorMsg * >( m ); + msg->job_id = cl->job_id; + + if ( cl->status == Client::WAITFORCS || msg->localBuild ) + { + // We don't know the job id, because we haven't received a reply from the scheduler yet or this is a local + // build. Use client_id to identify the job, the scheduler will use it for matching. + msg->client_id = cl->client_id; + } + + trace() << "handle_job_error " << msg->job_id << "/" << msg->client_id << " " << msg->error << endl; + + return send_scheduler( *msg ); +} + void Daemon::handle_old_request() { while ((current_kids + clients.active_processes) < std::max((unsigned int)1, max_kids)) { @@ -1508,7 +1535,8 @@ void Daemon::handle_old_request() clients.active_processes++; trace() << "pushed local job " << client->client_id << endl; - if (!send_scheduler(JobLocalBeginMsg(client->client_id, client->outfile))) { + if (!send_scheduler(JobLocalBeginMsg(client->client_id, client->infile, client->outfile, + client->language, client->compiler))) { return; } } @@ -1817,8 +1845,12 @@ int Daemon::handle_cs_conf(ConfCSMsg *msg) bool Daemon::handle_local_job(Client *client, Msg *msg) { + const JobLocalBeginMsg *beginMsg = dynamic_cast( msg ); client->status = Client::LINKJOB; - client->outfile = dynamic_cast(msg)->outfile; + client->infile = beginMsg->inFile; + client->outfile = beginMsg->outFile; + client->language = beginMsg->language; + client->compiler = beginMsg->compiler; return true; } @@ -1864,6 +1896,9 @@ bool Daemon::handle_activity(Client *client) case M_JOB_DONE: ret = handle_job_done(client, dynamic_cast(msg)); break; + case M_JOB_ERROR: + ret = handle_job_error(client, dynamic_cast(msg)); + break; case M_VERIFY_ENV: ret = handle_verify_env(client, dynamic_cast(msg)); break; diff --git a/daemon/workit.cpp b/daemon/workit.cpp index 549f6b512..1149a5b5c 100644 --- a/daemon/workit.cpp +++ b/daemon/workit.cpp @@ -87,6 +87,7 @@ error_client(MsgChannel *client, string error) { if (IS_PROTOCOL_23(client)) { client->send_msg(StatusTextMsg(error)); + client->shutdown_socket(); } } @@ -221,10 +222,12 @@ int work_it(CompileJob &j, unsigned int job_stat[], MsgChannel *client, CompileR char **argv = new char*[argc + 1]; int i = 0; bool clang = false; + bool clangCl = false; if (IS_PROTOCOL_30(client)) { assert(!j.compilerName().empty()); clang = (j.compilerName().find("clang") != string::npos); + clangCl = ( j.compilerName().find( "clang-cl" ) != string::npos ); argv[i++] = strdup(("/usr/bin/" + j.compilerName()).c_str()); } else { if (j.language() == CompileJob::Lang_C) { @@ -236,18 +239,21 @@ int work_it(CompileJob &j, unsigned int job_stat[], MsgChannel *client, CompileR } } - argv[i++] = strdup("-x"); - if (j.language() == CompileJob::Lang_C) { - argv[i++] = strdup("c"); - } else if (j.language() == CompileJob::Lang_CXX) { - argv[i++] = strdup("c++"); - } else if (j.language() == CompileJob::Lang_OBJC) { - argv[i++] = strdup("objective-c"); - } else if (j.language() == CompileJob::Lang_OBJCXX) { - argv[i++] = strdup("objective-c++"); - } else { - error_client(client, "language not supported"); - log_perror("language not supported"); + if ( !clangCl ) + { + argv[i++] = strdup("-x"); + if (j.language() == CompileJob::Lang_C) { + argv[i++] = strdup("c"); + } else if (j.language() == CompileJob::Lang_CXX) { + argv[i++] = strdup("c++"); + } else if (j.language() == CompileJob::Lang_OBJC) { + argv[i++] = strdup("objective-c"); + } else if (j.language() == CompileJob::Lang_OBJCXX) { + argv[i++] = strdup("objective-c++"); + } else { + error_client(client, "language not supported"); + log_perror("language not supported"); + } } if( clang ) { @@ -308,7 +314,7 @@ int work_it(CompileJob &j, unsigned int job_stat[], MsgChannel *client, CompileR assert(i <= argc); argstxt.clear(); - for (int pos = 1; + for (int pos = 0; pos < i; ++pos ) { argstxt += ' '; @@ -665,6 +671,7 @@ int work_it(CompileJob &j, unsigned int job_stat[], MsgChannel *client, CompileR || (rmsg.err.find("failed to map segment from shared object") != string::npos) || (rmsg.err.find("Assertion `NewElts && \"Out of memory\"' failed") != string::npos) || (rmsg.err.find("terminate called after throwing an instance of 'std::bad_alloc'") != string::npos) + || (rmsg.err.find("terminating with uncaught exception of type std::bad_alloc") != string::npos) || (rmsg.err.find("llvm::MallocSlabAllocator::Allocate") != string::npos)) { // the relation between ulimit and memory used is pretty thin ;( log_warning() << "Remote compilation failed, presumably because of running out of memory (exit code " @@ -705,6 +712,7 @@ int work_it(CompileJob &j, unsigned int job_stat[], MsgChannel *client, CompileR } } else { log_warning() << "Remote compilation aborted with exit code " << shell_exit_status(status) << endl; + return_value = EXIT_COMPILER_CRASHED; } return return_value; diff --git a/scheduler/compileserver.cpp b/scheduler/compileserver.cpp index f932fbc63..e38a9078f 100644 --- a/scheduler/compileserver.cpp +++ b/scheduler/compileserver.cpp @@ -45,6 +45,7 @@ CompileServer::CompileServer(const int fd, struct sockaddr *_addr, const socklen , m_nodeName() , m_busyInstalling(0) , m_hostPlatform() + , m_startTime(time(0)) , m_load(1000) , m_maxJobs(0) , m_noRemote(false) @@ -263,6 +264,21 @@ void CompileServer::setHostPlatform(const string &platform) m_hostPlatform = platform; } +unsigned int CompileServer::protocolVersion() const +{ + return m_protocolVersion; +} + +void CompileServer::setProtocolVersion( unsigned int version ) +{ + m_protocolVersion = version; +} + +time_t CompileServer::startTime() const +{ + return m_startTime; +} + unsigned int CompileServer::load() const { return m_load; diff --git a/scheduler/compileserver.h b/scheduler/compileserver.h index 69205d527..166452dc7 100644 --- a/scheduler/compileserver.h +++ b/scheduler/compileserver.h @@ -78,6 +78,11 @@ class CompileServer : public MsgChannel string hostPlatform() const; void setHostPlatform(const string &platform); + unsigned int protocolVersion() const; + void setProtocolVersion(unsigned int); + + time_t startTime() const; + unsigned int load() const; void setLoad(const unsigned int load); @@ -158,6 +163,8 @@ class CompileServer : public MsgChannel string m_nodeName; time_t m_busyInstalling; string m_hostPlatform; + unsigned int m_protocolVersion; + time_t m_startTime; // LOAD is load * 1000 unsigned int m_load; diff --git a/scheduler/scheduler.cpp b/scheduler/scheduler.cpp index 866d841fb..dab36cbca 100644 --- a/scheduler/scheduler.cpp +++ b/scheduler/scheduler.cpp @@ -97,7 +97,8 @@ static string pidFilePath; static map fd2cs; static volatile sig_atomic_t exit_main_loop = false; -time_t starttime; +time_t starttimeReal; +time_t starttimeBroadcast; time_t last_announce; static string scheduler_interface = ""; static unsigned int scheduler_port = 8765; @@ -211,7 +212,7 @@ static void add_job_stats(Job *job, JobDoneMsg *msg) #if DEBUG_SCHEDULER > 1 if (job->argFlags() < 7000) { trace() << "add_job_stats " << job->language() << " " - << (time(0) - starttime) << " " + << (time(0) - starttimeReal) << " " << st.compileTimeUser() << " " << (job->argFlags() & CompileJob::Flag_g ? '1' : '0') << (job->argFlags() & CompileJob::Flag_g3 ? '1' : '0') @@ -230,19 +231,30 @@ static bool handle_end(CompileServer *cs, Msg *); static void notify_monitors(Msg *m) { + list toRemove; list::iterator it; - list::iterator it_old; + const bool isSchedulerInfo = dynamic_cast< MonSchedulerInfoMsg *>( m ); - for (it = monitors.begin(); it != monitors.end();) { - it_old = it++; + for (it = monitors.begin(); it != monitors.end();++it) { + CompileServer *csIt = ( *it ); + + if ( isSchedulerInfo && !IS_PROTOCOL_108( csIt ) ) + { + continue; + } /* If we can't send it, don't be clever, simply close this monitor. */ - if (!(*it_old)->send_msg(*m, MsgChannel::SendNonBlocking /*| MsgChannel::SendBulkOnly*/)) { + if (!csIt->send_msg(*m, MsgChannel::SendNonBlocking /*| MsgChannel::SendBulkOnly*/)) { trace() << "monitor is blocking... removing" << endl; - handle_end(*it_old, 0); + toRemove.push_back( csIt ); } } + for ( it = toRemove.begin(); it != toRemove.end(); ++it ) + { + handle_end( *it, 0 ); + } + delete m; } @@ -346,6 +358,10 @@ static void handle_monitor_stats(CompileServer *cs, StatsMsg *m = 0) msg += buffer; sprintf(buffer, "Speed:%f\n", server_speed(cs)); msg += buffer; + sprintf(buffer, "ProtocolVersion:%d\n", cs->protocolVersion()); + msg += buffer; + sprintf(buffer, "StartTime:%ld\n", cs->startTime()); + msg += buffer; if (m) { sprintf(buffer, "Load:%d\n", m->load); @@ -501,9 +517,9 @@ static bool handle_local_job(CompileServer *cs, Msg *_m) } ++new_job_id; - trace() << "handle_local_job " << m->outfile << " " << m->id << endl; + trace() << "handle_local_job " << m->inFile << " " << m->id << endl; cs->insertClientJobId(m->id, new_job_id); - notify_monitors(new MonLocalJobBeginMsg(new_job_id, m->outfile, m->stime, cs->hostId())); + notify_monitors(new MonLocalJobBeginMsg(new_job_id, m->inFile, m->language, m->compiler, m->stime, cs->hostId())); return true; } @@ -1027,6 +1043,7 @@ static bool handle_login(CompileServer *cs, Msg *_m) } cs->setHostPlatform(m->host_platform); + cs->setProtocolVersion((unsigned int)m->protocol_version); cs->setChrootPossible(m->chroot_possible); cs->setSupportedFeatures(m->supported_features); cs->pick_new_id(); @@ -1036,6 +1053,14 @@ static bool handle_login(CompileServer *cs, Msg *_m) return false; } + if ( !IS_PROTOCOL_107( cs ) ) + { + log_warning() << "login denied daemon " << m->nodename << ", protocol version " << cs->protocol << " too old" + << endl; + return false; + } + + log_warning() << "login daemon " << m->nodename << endl; dbg << "login " << m->nodename << " protocol version: " << cs->protocol << " features: " << supported_features_to_string(m->supported_features) << " ["; @@ -1081,6 +1106,7 @@ static bool handle_relogin(MsgChannel *mc, Msg *_m) cs->setBusyInstalling(0); std::ostream &dbg = trace(); + log_warning() << "relogin daemon " << m->nodename << endl; dbg << "RELOGIN " << cs->nodeName() << "(" << cs->hostPlatform() << "): ["; for (Environments::const_iterator it = m->envs.begin(); it != m->envs.end(); ++it) { @@ -1108,11 +1134,16 @@ static bool handle_mon_login(CompileServer *cs, Msg *_m) monitors.push_back(cs); // monitors really want to be fed lazily cs->setBulkTransfer(); + cs->setNodeName(cs->name); + + log_warning() << "login monitor " << cs->nodeName() << endl; for (list::const_iterator it = css.begin(); it != css.end(); ++it) { handle_monitor_stats(*it); } + notify_monitors( new MonSchedulerInfoMsg( starttimeReal, monitors.size() ) ); + fd2cs.erase(cs->fd); // no expected data from them return true; } @@ -1274,6 +1305,28 @@ static bool handle_job_done(CompileServer *cs, Msg *_m) return true; } +static bool handle_job_error( CompileServer *cs, Msg *_m ) +{ + JobErrorMsg *m = dynamic_cast< JobErrorMsg * >( _m ); + + if ( !m ) + { + return false; + } + + auto monMsg = new JobErrorMsg(); + *monMsg = *m; + if ( uint32_t clientId = m->client_id ) + { + // Der Client weiss die Job-ID nicht. Deshalb über die Client-ID den Job ermitteln. + monMsg->job_id = cs->getClientJobId( clientId ); + monMsg->client_id = 0; + } + notify_monitors( monMsg ); + + return true; +} + static bool handle_ping(CompileServer *cs, Msg * /*_m*/) { cs->last_talk = time(0); @@ -1398,7 +1451,7 @@ static bool handle_control_login(CompileServer *cs) std::ostringstream o; o << "200-ICECC " VERSION ": " - << time(0) - starttime << "s uptime, " + << time(0) - starttimeReal << "s uptime, " << css.size() << " hosts, " << jobs.size() << " jobs in queue " << "(" << new_job_id << " total)." << endl; @@ -1593,16 +1646,25 @@ static bool handle_end(CompileServer *toremove, Msg *m) (void)m; #endif + bool found = true; + switch (toremove->type()) { case CompileServer::MONITOR: - assert(find(monitors.begin(), monitors.end(), toremove) != monitors.end()); + log_warning() << "logout monitor " << toremove->nodeName() << endl; + found = ( find(monitors.begin(), monitors.end(), toremove) != monitors.end() ); + assert( found ); + if ( !found ) + { + log_error() << "monitor not found " << toremove->nodeName() << endl; + } monitors.remove(toremove); #if DEBUG_SCHEDULER > 1 trace() << "handle_end(moni) " << monitors.size() << endl; #endif + notify_monitors( new MonSchedulerInfoMsg( starttimeReal, monitors.size() ) ); break; case CompileServer::DAEMON: - log_info() << "remove daemon " << toremove->nodeName() << endl; + log_warning() << "logout daemon " << toremove->nodeName() << endl; notify_monitors(new MonStatsMsg(toremove->hostId(), "State:Offline\n")); @@ -1670,17 +1732,22 @@ static bool handle_end(CompileServer *toremove, Msg *m) break; case CompileServer::LINE: + log_warning() << "logout line" << endl; toremove->send_msg(TextMsg("200 Good Bye!")); controls.remove(toremove); break; default: + log_error() << "logout UNKNOWN" << endl; trace() << "remote end had UNKNOWN type?" << endl; break; } fd2cs.erase(toremove->fd); - delete toremove; + if ( found ) + { + delete toremove; + } return true; } @@ -1708,6 +1775,9 @@ static bool handle_activity(CompileServer *cs) case M_JOB_DONE: ret = handle_job_done(cs, m); break; + case M_JOB_ERROR: + ret = handle_job_error( cs, m ); + break; case M_PING: ret = handle_ping(cs, m); break; @@ -1871,13 +1941,13 @@ static void handle_scheduler_announce(const char* buf, const char* netname, bool { if (other_netname == netname) { - if (other_protocol_version > PROTOCOL_VERSION || (other_protocol_version == PROTOCOL_VERSION && other_time < starttime)) + if (other_protocol_version > PROTOCOL_VERSION || (other_protocol_version == PROTOCOL_VERSION && other_time < starttimeBroadcast)) { if (!persistent_clients){ log_info() << "Scheduler from " << inet_ntoa(broad_addr.sin_addr) << ":" << ntohs(broad_addr.sin_port) - << " (version " << int(other_protocol_version) << ") has announced itself as a preferred" - " scheduler, disconnecting all connections." << endl; + << " (version:" << int(other_protocol_version) << " start:" << starttimeBroadcast + << ") has announced itself as a preferred scheduler, disconnecting all connections." << endl; if (!css.empty() || !monitors.empty()) { while (!css.empty()) @@ -2104,9 +2174,10 @@ int main(int argc, char *argv[]) return 1; } - starttime = time(0); + starttimeReal = time(0); + starttimeBroadcast = starttimeReal; if( getenv( "ICECC_FAKE_STARTTIME" ) != NULL ) - starttime -= 1000; + starttimeBroadcast = 946684800; // 01.01.2000 ofstream pidFile; string progName = argv[0]; @@ -2124,8 +2195,8 @@ int main(int argc, char *argv[]) time_t next_listen = 0; - Broadcasts::broadcastSchedulerVersion(scheduler_port, netname, starttime); - last_announce = starttime; +// Broadcasts::broadcastSchedulerVersion(scheduler_port, netname, starttimeBroadcast); + last_announce = starttimeBroadcast; while (!exit_main_loop) { int timeout = prune_servers(); @@ -2137,8 +2208,13 @@ int main(int argc, char *argv[]) /* Announce ourselves from time to time, to make other possible schedulers disconnect their daemons if we are the preferred scheduler (daemons with version new enough should automatically select the best scheduler, but old daemons connect randomly). */ - if (last_announce + 120 < time(NULL)) { - Broadcasts::broadcastSchedulerVersion(scheduler_port, netname, starttime); + /* Broadcast nur senden, wenn mindestens zwei Daemonen angemeldet sind. Es kann nämlich sein, + dass dieser Scheduler nicht von außen erreichbar ist und dadurch kein Daemon von seiner + Existenz weiß. Broadcasts senden kann er aber. Das führt dazu dass, falls er der älteste ist, + regelmäßig alle Daemonen vom zweitältesten aber erreichbaren Scheduler rausgeworfen werden. + #65813 */ + if ( ( last_announce + 120 < time(NULL) ) && ( css.size() > 2 ) ) { + Broadcasts::broadcastSchedulerVersion(scheduler_port, netname, starttimeBroadcast); last_announce = time(NULL); } @@ -2312,7 +2388,7 @@ int main(int argc, char *argv[]) log_info() << "broadcast from " << inet_ntoa(broad_addr.sin_addr) << ":" << ntohs(broad_addr.sin_port) << " (version " << daemon_version << ")\n"; - int reply_len = DiscoverSched::prepareBroadcastReply(buf, netname, starttime); + int reply_len = DiscoverSched::prepareBroadcastReply(buf, netname, starttimeBroadcast); if (sendto(broad_fd, buf, reply_len, 0, (struct sockaddr *) &broad_addr, broad_len) != reply_len) { log_perror("sendto()"); diff --git a/services/comm.cpp b/services/comm.cpp index e79151a6f..9048f1b85 100644 --- a/services/comm.cpp +++ b/services/comm.cpp @@ -22,6 +22,8 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ +#define MSG_DEBUG + #include #include @@ -402,7 +404,7 @@ bool MsgChannel::flush_writebuf(bool blocking) } errno = send_errno; - log_perror("flush_writebuf() failed"); + log_perror(string( "flush_writebuf() failed " ) + name); error = true; break; } else if (ret == 0) { @@ -1174,6 +1176,10 @@ Msg *MsgChannel::get_msg(int timeout, bool eofAllowed) type = (enum MsgType) t; } +#ifdef MSG_DEBUG + trace() << "recv msg (" << name << "): " << Msg::readableMsgType( type ) << endl; +#endif + switch (type) { case M_UNKNOWN: set_error(); @@ -1208,6 +1214,9 @@ Msg *MsgChannel::get_msg(int timeout, bool eofAllowed) case M_JOB_DONE: m = new JobDoneMsg; break; + case M_JOB_ERROR: + m = new JobErrorMsg; + break; case M_LOGIN: m = new LoginMsg; break; @@ -1235,6 +1244,9 @@ Msg *MsgChannel::get_msg(int timeout, bool eofAllowed) case M_MON_STATS: m = new MonStatsMsg; break; + case M_MON_SCHEDULER_INFO: + m = new MonSchedulerInfoMsg; + break; case M_JOB_LOCAL_BEGIN: m = new JobLocalBeginMsg; break; @@ -1305,6 +1317,10 @@ bool MsgChannel::send_msg(const Msg &m, int flags) return false; } +#ifdef MSG_DEBUG + trace() << "send msg (" << name << "): " << Msg::readableMsgType( m.type ) << endl; +#endif + chop_output(); size_t msgtogo_old = msgtogo; @@ -1355,6 +1371,33 @@ static int get_second_port_for_debug( int port ) return secondPort ? secondPort : -1; } +void MsgChannel::shutdown_socket() +{ + // Example was + // https://blog.netherlabs.nl/articles/2009/01/18/the-ultimate-so_linger-page-or-why-is-my-tcp-not-reliable + + // Close the send channel + shutdown( fd, SHUT_WR ); + + sleep( 1 ); + + // Read all, what the other side is still sending. Not before this is finished, the other side will look if there is something to receive. + while ( true ) + { + usleep( 50000 ); + char buffer[ 10000 ]; + const int res = read( fd, buffer, sizeof( buffer ) ); + if (res <= 0 ) + { + break; + } + } + + sleep( 2 ); + + // Now the socket can be safely closed +} + void Broadcasts::broadcastSchedulerVersion(int scheduler_port, const char* netname, time_t starttime) { // Code for older schedulers than version 38. Has endianness problems, the message size @@ -1763,7 +1806,8 @@ MsgChannel *DiscoverSched::try_get_scheduler() << ":" << ntohs(remote_addr.sin_port) << " (unknown version)" << endl; } else { log_info() << "Suitable scheduler found at " << inet_ntoa(remote_addr.sin_addr) - << ":" << ntohs(remote_addr.sin_port) << " (version: " << version << ")" << endl; + << ":" << ntohs(remote_addr.sin_port) << " (version:" << version + << " start:" << start_time << ")" << endl; } if (best_version != 0) multiple = true; @@ -1923,7 +1967,7 @@ void Msg::send_to_channel(MsgChannel *c) const } GetCSMsg::GetCSMsg(const Environments &envs, const std::string &f, - CompileJob::Language _lang, unsigned int _count, + CompileJob::Language _lang, const std::string &_compiler, unsigned int _count, std::string _target, unsigned int _arg_flags, const std::string &host, int _minimal_host_version, unsigned int _required_features, @@ -1932,6 +1976,7 @@ GetCSMsg::GetCSMsg(const Environments &envs, const std::string &f, , versions(envs) , filename(f) , lang(_lang) + , compiler(_compiler) , count(_count) , target(_target) , arg_flags(_arg_flags) @@ -1946,6 +1991,49 @@ GetCSMsg::GetCSMsg(const Environments &envs, const std::string &f, minimal_host_version = max( minimal_host_version, 42 ); } +std::string Msg::readableMsgType( MsgType type ) +{ + switch ( type ) + { + case M_BLACKLIST_HOST_ENV: return "M_BLACKLIST_HOST_ENV"; + case M_COMPILE_FILE: return "M_COMPILE_FILE"; + case M_COMPILE_RESULT: return "M_COMPILE_RESULT"; + case M_CS_CONF: return "M_CS_CONF"; + case M_END: return "M_END"; + case M_FILE_CHUNK: return "M_FILE_CHUNK"; + case M_GET_CS: return "M_GET_CS"; + case M_GET_INTERNALS: return "M_GET_INTERNALS"; + case M_GET_NATIVE_ENV: return "M_GET_NATIVE_ENV"; + case M_JOB_BEGIN: return "M_JOB_BEGIN"; + case M_JOB_DONE: return "M_JOB_DONE"; + case M_JOB_ERROR: return "M_JOB_ERROR"; + case M_JOB_LOCAL_BEGIN: return "M_JOB_LOCAL_BEGIN"; + case M_JOB_LOCAL_DONE: return "M_JOB_LOCAL_DONE"; + case M_LOGIN: return "M_LOGIN"; + case M_MON_GET_CS: return "M_MON_GET_CS"; + case M_MON_JOB_BEGIN: return "M_MON_JOB_BEGIN"; + case M_MON_JOB_DONE: return "M_MON_JOB_DONE"; + case M_MON_LOCAL_JOB_BEGIN: return "M_MON_LOCAL_JOB_BEGIN"; + case M_MON_LOGIN: return "M_MON_LOGIN"; + case M_MON_STATS: return "M_MON_STATS"; + case M_MON_SCHEDULER_INFO: return "M_MON_SCHEDULER_INFO"; + case M_NATIVE_ENV: return "M_NATIVE_ENV"; + case M_NO_CS: return "M_NO_CS"; + case M_PING: return "M_PING"; + case M_STATS: return "M_STATS"; + case M_STATUS_TEXT: return "M_STATUS_TEXT"; + case M_TEXT: return "M_TEXT"; + case M_TIMEOUT: return "M_TIMEOUT"; + case M_TRANFER_ENV: return "M_TRANFER_ENV"; + case M_USE_CS: return "M_USE_CS"; + case M_VERIFY_ENV: return "M_VERIFY_ENV"; + case M_VERIFY_ENV_RESULT: return "M_VERIFY_ENV_RESULT"; + default: break; + } + + return "M_UNKNOWN"; +} + void GetCSMsg::fill_from_channel(MsgChannel *c) { Msg::fill_from_channel(c); @@ -1987,6 +2075,10 @@ void GetCSMsg::fill_from_channel(MsgChannel *c) if (IS_PROTOCOL_42(c)) { *c >> required_features; } + + if (IS_PROTOCOL_107(c)) { + *c >> compiler; + } } void GetCSMsg::send_to_channel(MsgChannel *c) const @@ -2017,6 +2109,10 @@ void GetCSMsg::send_to_channel(MsgChannel *c) const if (IS_PROTOCOL_42(c)) { *c << required_features; } + + if (IS_PROTOCOL_107(c)) { + *c << compiler; + } } void UseCSMsg::fill_from_channel(MsgChannel *c) @@ -2142,7 +2238,14 @@ void CompileFileMsg::send_to_channel(MsgChannel *c) const if (job->compilerName().find("clang") != string::npos) { // Hack for compilerwrapper. std::list flags = job->remoteFlags(); - flags.push_front("clang"); + if ( job->compilerName().find( "clang-cl" ) != string::npos ) + { + flags.push_front( "clang-cl" ); + } + else + { + flags.push_front("clang"); + } *c << flags; } else { *c << job->remoteFlags(); @@ -2173,6 +2276,11 @@ void CompileFileMsg::send_to_channel(MsgChannel *c) const // hardcoded). For clang, the binary is just clang for both C/C++. string CompileFileMsg::remote_compiler_name() const { + if ( job->compilerName().find( "clang-cl" ) != string::npos) + { + return "clang-cl"; + } + if (job->compilerName().find("clang") != string::npos) { return "clang"; } @@ -2267,16 +2375,31 @@ void JobLocalBeginMsg::fill_from_channel(MsgChannel *c) { Msg::fill_from_channel(c); *c >> stime; - *c >> outfile; + *c >> outFile; *c >> id; + if ( IS_PROTOCOL_107(c) ) + { + uint32_t lang; + + *c >> inFile; + *c >> lang; + *c >> compiler; + language = static_cast< CompileJob::Language >( lang ); + } } void JobLocalBeginMsg::send_to_channel(MsgChannel *c) const { Msg::send_to_channel(c); *c << stime; - *c << outfile; + *c << outFile; *c << id; + if ( IS_PROTOCOL_107(c) ) + { + *c << inFile; + *c << ( uint32_t ) language; + *c << compiler; + } } void JobLocalDoneMsg::fill_from_channel(MsgChannel *c) @@ -2377,6 +2500,34 @@ void JobDoneMsg::set_job_id( uint32_t jobId ) flags &= ~ (uint32_t) UnknownJobId; } +void JobErrorMsg::fill_from_channel( MsgChannel *c ) +{ + Msg::fill_from_channel( c ); + *c >> job_id; + *c >> error; + if ( IS_PROTOCOL_107( c ) ) + { + uint32_t localBuildInt = 0; + + *c >> client_id; + *c >> localBuildInt; + + localBuild = ( localBuildInt != 0); + } +} + +void JobErrorMsg::send_to_channel( MsgChannel *c ) const +{ + Msg::send_to_channel(c); + *c << job_id; + *c << error; + if ( IS_PROTOCOL_107( c ) ) + { + *c << client_id; + *c << uint32_t( localBuild ); + } +} + LoginMsg::LoginMsg(unsigned int myport, const std::string &_nodename, const std::string &_host_platform, unsigned int myfeatures) : Msg(M_LOGIN) @@ -2387,6 +2538,7 @@ LoginMsg::LoginMsg(unsigned int myport, const std::string &_nodename, const std: , nodename(_nodename) , host_platform(_host_platform) , supported_features(myfeatures) + , protocol_version(0) { #ifdef HAVE_LIBCAP_NG chroot_possible = capng_have_capability(CAPNG_EFFECTIVE, CAP_SYS_CHROOT); @@ -2414,6 +2566,7 @@ void LoginMsg::fill_from_channel(MsgChannel *c) } noremote = (net_noremote != 0); + protocol_version = c->protocol; supported_features = 0; if (IS_PROTOCOL_42(c)) { @@ -2581,6 +2734,13 @@ void MonLocalJobBeginMsg::fill_from_channel(MsgChannel *c) *c >> job_id; *c >> stime; *c >> file; + if ( IS_PROTOCOL_107(c) ) + { + uint32_t lang; + *c >> lang; + *c >> compiler; + language = static_cast< CompileJob::Language >( lang ); + } } void MonLocalJobBeginMsg::send_to_channel(MsgChannel *c) const @@ -2590,6 +2750,11 @@ void MonLocalJobBeginMsg::send_to_channel(MsgChannel *c) const *c << job_id; *c << stime; *c << shorten_filename(file); + if ( IS_PROTOCOL_107(c) ) + { + *c << ( uint32_t ) language; + *c << compiler; + } } void MonStatsMsg::fill_from_channel(MsgChannel *c) @@ -2606,6 +2771,26 @@ void MonStatsMsg::send_to_channel(MsgChannel *c) const *c << statmsg; } +void MonSchedulerInfoMsg::fill_from_channel( MsgChannel *c ) +{ + Msg::fill_from_channel( c ); + uint32_t st; + + *c >> protocolVersion; + *c >> st; + *c >> monitors; + + startTime = st; +} + +void MonSchedulerInfoMsg::send_to_channel( MsgChannel *c ) const +{ + Msg::send_to_channel(c); + *c << protocolVersion; + *c << ( uint64_t ) startTime; + *c << monitors; +} + void TextMsg::fill_from_channel(MsgChannel *c) { c->read_line(text); @@ -2620,12 +2805,14 @@ void StatusTextMsg::fill_from_channel(MsgChannel *c) { Msg::fill_from_channel(c); *c >> text; + log_error() << "Received status: " << text << endl; } void StatusTextMsg::send_to_channel(MsgChannel *c) const { Msg::send_to_channel(c); *c << text; + log_error() << "Sending status: " << text << endl; } void VerifyEnvMsg::fill_from_channel(MsgChannel *c) diff --git a/services/comm.h b/services/comm.h index a18f651ba..2b246c686 100644 --- a/services/comm.h +++ b/services/comm.h @@ -36,7 +36,8 @@ #include "job.h" // if you increase the PROTOCOL_VERSION, add a macro below and use that -#define PROTOCOL_VERSION 42 +//#define PROTOCOL_VERSION 42 +#define PROTOCOL_VERSION 108 // if you increase the MIN_PROTOCOL_VERSION, comment out macros below and clean up the code #define MIN_PROTOCOL_VERSION 21 @@ -67,6 +68,8 @@ #define IS_PROTOCOL_40(c) ((c)->protocol >= 40) #define IS_PROTOCOL_41(c) ((c)->protocol >= 41) #define IS_PROTOCOL_42(c) ((c)->protocol >= 42) +#define IS_PROTOCOL_107(c) ((c)->protocol >= 107) +#define IS_PROTOCOL_108(c) ((c)->protocol >= 108) // Terms used: // S = scheduler @@ -140,7 +143,9 @@ enum MsgType { // C --> CS, CS --> S (forwarded from C), to not use given host for given environment M_BLACKLIST_HOST_ENV, // S --> CS - M_NO_CS + M_NO_CS, + M_JOB_ERROR, + M_MON_SCHEDULER_INFO, }; enum Compression { @@ -168,6 +173,8 @@ class Msg virtual void fill_from_channel(MsgChannel *c); virtual void send_to_channel(MsgChannel *c) const; + static std::string readableMsgType( MsgType ); + enum MsgType type; }; @@ -191,6 +198,7 @@ class MsgChannel // false <--> error (msg not send) bool send_msg(const Msg &, int SendFlags = SendBlocking); + void shutdown_socket(); bool has_msg(void) const { @@ -421,7 +429,7 @@ class GetCSMsg : public Msg , client_count(0) {} GetCSMsg(const Environments &envs, const std::string &f, - CompileJob::Language _lang, unsigned int _count, + CompileJob::Language _lang, const std::string &_compiler, unsigned int _count, std::string _target, unsigned int _arg_flags, const std::string &host, int _minimal_host_version, unsigned int _required_features, @@ -433,6 +441,7 @@ class GetCSMsg : public Msg Environments versions; std::string filename; CompileJob::Language lang; + std::string compiler; uint32_t count; // the number of UseCS messages to answer with - usually 1 std::string target; uint32_t arg_flags; @@ -680,16 +689,23 @@ class JobDoneMsg : public Msg class JobLocalBeginMsg : public Msg { public: - JobLocalBeginMsg(int job_id = 0, const std::string &file = "") + JobLocalBeginMsg(int job_id = 0, const std::string &infile = "", const std::string &outfile = "", + CompileJob::Language lang = CompileJob::Lang_Custom, const std::string &comp = "") : Msg(M_JOB_LOCAL_BEGIN) - , outfile(file) + , inFile(infile) + , outFile(outfile) + , language(lang) + , compiler(comp) , stime(time(0)) , id(job_id) {} virtual void fill_from_channel(MsgChannel *c); virtual void send_to_channel(MsgChannel *c) const; - std::string outfile; + std::string inFile; + std::string outFile; + CompileJob::Language language; + std::string compiler; uint32_t stime; uint32_t id; }; @@ -707,6 +723,28 @@ class JobLocalDoneMsg : public Msg uint32_t job_id; }; +class JobErrorMsg : public Msg +{ +public: + JobErrorMsg() : + Msg( M_JOB_ERROR ) + {} + + JobErrorMsg( unsigned int id, bool, const std::string &err ) : + Msg( M_JOB_ERROR ), + job_id( id ), + error( err ) + {} + + virtual void fill_from_channel( MsgChannel *c ); + virtual void send_to_channel( MsgChannel *c ) const; + + uint32_t job_id = 0; + uint32_t client_id = 0; + bool localBuild; + std::string error; +}; + class LoginMsg : public Msg { public: @@ -727,6 +765,7 @@ class LoginMsg : public Msg std::string nodename; std::string host_platform; uint32_t supported_features; // bitmask of various features the node supports + int protocol_version; }; class ConfCSMsg : public Msg @@ -822,7 +861,7 @@ class MonGetCSMsg : public GetCSMsg } MonGetCSMsg(int jobid, int hostid, GetCSMsg *m) - : GetCSMsg(Environments(), m->filename, m->lang, 1, m->target, 0, std::string(), false, m->client_count) + : GetCSMsg(Environments(), m->filename, m->lang, m->compiler, 1, m->target, 0, std::string(), false, m->client_count) , job_id(jobid) , clientid(hostid) { @@ -881,12 +920,15 @@ class MonLocalJobBeginMsg : public Msg MonLocalJobBeginMsg() : Msg(M_MON_LOCAL_JOB_BEGIN) {} - MonLocalJobBeginMsg(unsigned int id, const std::string &_file, unsigned int time, int _hostid) + MonLocalJobBeginMsg(unsigned int id, const std::string &_file, CompileJob::Language lang, const std::string &comp, + unsigned int time, int _hostid) : Msg(M_MON_LOCAL_JOB_BEGIN) , job_id(id) , stime(time) , hostid(_hostid) - , file(_file) {} + , file(_file) + , language(lang) + , compiler(comp) {} virtual void fill_from_channel(MsgChannel *c); virtual void send_to_channel(MsgChannel *c) const; @@ -895,6 +937,8 @@ class MonLocalJobBeginMsg : public Msg uint32_t stime; uint32_t hostid; std::string file; + CompileJob::Language language; + std::string compiler; }; class MonStatsMsg : public Msg @@ -915,6 +959,27 @@ class MonStatsMsg : public Msg std::string statmsg; }; +class MonSchedulerInfoMsg : public Msg +{ +public: + MonSchedulerInfoMsg() : + Msg( M_MON_SCHEDULER_INFO ) + {} + + MonSchedulerInfoMsg( time_t st, uint32_t mons ) : + Msg( M_MON_SCHEDULER_INFO ), + startTime( st ), + monitors( mons ) + {} + + virtual void fill_from_channel( MsgChannel *c ); + virtual void send_to_channel( MsgChannel *c ) const; + + uint32_t protocolVersion = PROTOCOL_VERSION; + time_t startTime = 0; + uint32_t monitors = 0; +}; + class TextMsg : public Msg { public: diff --git a/services/logging.cpp b/services/logging.cpp index c020e261a..72ba0abb0 100644 --- a/services/logging.cpp +++ b/services/logging.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef __linux__ #include #endif @@ -117,7 +118,8 @@ void setup_debug(int level, const string &filename, const string &prefix) } } - setenv("SEGFAULT_OUTPUT_NAME", fname.c_str(), false); + string segv_fname = fname + ".segv"; + setenv("SEGFAULT_OUTPUT_NAME", segv_fname.c_str(), false); #endif output = &logfile_file; } else if( const char* ccache_err_fd = getenv( "UNCACHED_ERR_FD" )) { @@ -223,4 +225,23 @@ void flush_debug() } } +std::ostream & log_backtrace() +{ + void *array[ 1000 ]; + + size_t size = backtrace( array, 1000 ); + char **strings = backtrace_symbols (array, size); + + trace() << "Backtrace begin" << std::endl; + + for ( size_t xx = 0; xx < size; xx++) + { + trace() << strings[ xx ] << std::endl; + } + + free( strings ); + + return trace() << "Backtrace end" << std::endl; +} + unsigned log_block::nesting; diff --git a/services/logging.h b/services/logging.h index 4024bcf60..020182e21 100644 --- a/services/logging.h +++ b/services/logging.h @@ -133,6 +133,8 @@ static inline std::ostream & log_perror_trace(const char *prefix) return log_errno_trace(prefix, errno); } +std::ostream & log_backtrace(); + class log_block { static unsigned nesting; diff --git a/tests/test.sh b/tests/test.sh index e1d776b0d..27a5bdc48 100755 --- a/tests/test.sh +++ b/tests/test.sh @@ -2225,7 +2225,8 @@ else skipped_tests="$skipped_tests zero_local_jobs_test" fi -if test -z "$chroot_disabled"; then +# This tests don't work for us because of code changes at switching to another scheduler (isoletad scheduler) +if false && test -z "$chroot_disabled"; then echo Testing different netnames. reset_logs remote "Different netnames" stop_ice 1