diff --git a/Andor/CMakeLists.txt b/Andor/CMakeLists.txt index 973fea2e..1af81c1a 100644 --- a/Andor/CMakeLists.txt +++ b/Andor/CMakeLists.txt @@ -10,7 +10,7 @@ set( ANDOR_DIR ${PROJECT_BASE_DIR}/Andor ) set( CMAKE_CXX_STANDARD 17 ) -add_definitions( -Wall -ansi -O0 -Wno-variadic-macros -ggdb ) +add_definitions( -Wall -O0 -Wno-variadic-macros -ggdb ) add_definitions(-D_LP64) include_directories( ${PROJECT_BASE_DIR}/utils ) @@ -30,12 +30,14 @@ find_library( CFITS_LIB cfitsio NAMES libcfitsio PATHS /usr/local/lib ) find_path( PYTHON_DEV "Python.h" PATHS /usr/include/python3.9 ) find_library( PYTHON_LIB python3.9 NAMES libpython3.9 PATHS /usr/lib64 ) include_directories( ${PYTHON_DEV} ) + add_link_options( -L/usr/lib64 -lpython3.9 -lcrypt -lpthread -ldl -lutil -lm -lm ) +include_directories( ${PYTHON_DEV} ) + add_library( andor STATIC ${ANDOR_DIR}/andor.cpp ${ANDOR_DIR}/andor_emulator.cpp - ${PYTHON_DEV} ) target_link_libraries( andor diff --git a/Andor/andor_emulator.cpp b/Andor/andor_emulator.cpp index dc14fc93..df7a0e9d 100644 --- a/Andor/andor_emulator.cpp +++ b/Andor/andor_emulator.cpp @@ -1218,7 +1218,7 @@ namespace Andor { } #ifdef LOGLEVEL_DEBUG -// log_python_arguments(pFunction, pArgs, pKwArgs); + log_python_arguments(pFunction, pArgs, pKwArgs); #endif // Call the Python function here diff --git a/CMakeLists.txt b/CMakeLists.txt index 27ac4ab1..a963f06b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ # @author David Hale # ---------------------------------------------------------------------------- -add_definitions(-DLOGLEVEL_DEBUG) +#add_definitions(-DLOGLEVEL_DEBUG) cmake_minimum_required( VERSION 3.12 ) diff --git a/Config/acamd.cfg.in b/Config/acamd.cfg.in index ce354d8a..3007f567 100644 --- a/Config/acamd.cfg.in +++ b/Config/acamd.cfg.in @@ -169,14 +169,11 @@ ACQUIRE_TCS_MAX_OFFSET=60 # the maximum allowable offset sent to the TCS, in SKYSIM_IMAGE_SIZE=1024 # ----------------------------------------------------------------------------- -# TELEM_PROVIDER=( ) +# SUBSCRIBE_TO=( ) # -# This is a list of telemetry providers where is the daemon name, -# and is the port on which to send the telemetry request. +# This is a list of pub/sub sources to subscribe to, where is the daemon +# name and is its ZeroMQ publish endpoint. # Provide one per line. # -TELEM_PROVIDER=(tcsd @TCSD_NB_PORT@) -TELEM_PROVIDER=(sequencerd @SEQUENCERD_NB_PORT@) -# SUBSCRIBE_TO=(tcsd "tcp://127.0.0.1:@TCSD_PUB_PORT@") SUBSCRIBE_TO=(sequencerd "tcp://127.0.0.1:@SEQUENCERD_PUB_PORT@") diff --git a/Config/camerad.cfg.in b/Config/camerad.cfg.in index e90faefd..10ff478a 100644 --- a/Config/camerad.cfg.in +++ b/Config/camerad.cfg.in @@ -155,20 +155,11 @@ ACTIVATE_COMMANDS=(G PON, ERS 1000 1000, EPG 500, CLR) ACTIVATE_COMMANDS=(U PON, CLR) # ----------------------------------------------------------------------------- -# TELEM_PROVIDER=( ) -# -# This is a list of telemetry providers where is the daemon name, -# and is the port on which to send the telemetry request. These are the -# sources for telemetry information for FITS headers. Provide one per line. -# -TELEM_PROVIDER=(calibd @CALIBD_NB_PORT@) -TELEM_PROVIDER=(flexured @FLEXURED_NB_PORT@) -TELEM_PROVIDER=(focusd @FOCUSD_NB_PORT@) -TELEM_PROVIDER=(powerd @POWERD_NB_PORT@) -TELEM_PROVIDER=(sequencerd @SEQUENCERD_NB_PORT@) -TELEM_PROVIDER=(slitd @SLITD_NB_PORT@) -TELEM_PROVIDER=(tcsd @TCSD_NB_PORT@) -TELEM_PROVIDER=(thermald @THERMALD_NB_PORT@) +# SUBSCRIBE_TO=( ) +# +# This is a list of pub/sub sources to subscribe to, where is the daemon +# name and is its ZeroMQ publish endpoint. These are the sources for +# telemetry information for FITS headers. Provide one per line. # SUBSCRIBE_TO=(calibd "tcp://127.0.0.1:@CALIBD_PUB_PORT@") SUBSCRIBE_TO=(flexured "tcp://127.0.0.1:@FLEXURED_PUB_PORT@") diff --git a/Config/flexured.cfg.in b/Config/flexured.cfg.in index c71cbe23..f6aaebde 100644 --- a/Config/flexured.cfg.in +++ b/Config/flexured.cfg.in @@ -7,7 +7,13 @@ NBPORT=@FLEXURED_NB_PORT@ # slitd server non-blocking port DAEMON=no # run as daemon? ASYNCPORT=@MESSAGEPORT@ # asynchronous message port ASYNCGROUP=239.1.1.234 # asynchronous message broadcast group -PUBLISHER_PORT="tcp://127.0.0.1:@FLEXURED_PUB_PORT@" # my zeromq pub port + +# Message pub/sub +# PUB_ENDPOINT=tcp://127.0.0.1: +# SUB_ENDPOINT=tcp://127.0.0.1: +# +PUB_ENDPOINT="tcp://127.0.0.1:@MESSAGE_BROKER_SUB_PORT@" +SUB_ENDPOINT="tcp://127.0.0.1:@MESSAGE_BROKER_PUB_PORT@" # this is the port number that the emulator listens to # @@ -61,14 +67,11 @@ MOTOR_AXIS="I 3 -1000 1000 0 na 300.0" # # ----------------------------------------------------------------------------- -# TELEM_PROVIDER=( ) +# SUBSCRIBE_TO=( ) # -# This is a list of telemetry providers where is the daemon name, -# and is the port on which to send the telemetry request. +# This is a list of pub/sub sources to subscribe to, where is the daemon +# name and is its ZeroMQ publish endpoint. # Provide one per line. # -TELEM_PROVIDER=(tcsd @TCSD_NB_PORT@) -TELEM_PROVIDER=(thermald @THERMALD_NB_PORT@) -# SUBSCRIBE_TO=(tcsd "tcp://127.0.0.1:@TCSD_PUB_PORT@") SUBSCRIBE_TO=(thermald "tcp://127.0.0.1:@THERMALD_PUB_PORT@") diff --git a/Config/sequencerd.cfg.in b/Config/sequencerd.cfg.in index 5a9cbbce..69d2f9a2 100644 --- a/Config/sequencerd.cfg.in +++ b/Config/sequencerd.cfg.in @@ -87,7 +87,7 @@ CAMERA_EPILOGUE=(close) # slit init and shutdown states # formatted as (width offset) # -SLIT__INIT=(0.5 3.0) +SLIT__INIT=(0.4 3.0) SLIT__SHUTDOWN= # ACAM init and shutdown states @@ -107,7 +107,7 @@ CALIB_DOOR__SHUTDOWN=close # Virtual Slit Mode slit offset positions # units are arcseconds # -VIRTUAL_SLITW_ACQUIRE=0.5 # slit width during acquire +VIRTUAL_SLITW_ACQUIRE=0.4 # slit width during acquire VIRTUAL_SLITO_ACQUIRE=-3.0 # slit offset for acquiring target VIRTUAL_SLITO_EXPOSE=3.0 # slit offset for science exposure @@ -162,7 +162,7 @@ ACQUIRE_MIN_REPEAT=2 # minimum number of sequential successful a ACQUIRE_TCS_MAX_OFFSET=60 # the maximum allowable offset sent to the TCS, in arcsec # Calibration Settings -# CAL_TARGET=(name caldoor calcover U G R I lampthar lampfear lampbluc lampredc lolamp hilamp mod1 mod2 ... mod6) +# CAL_TARGET=(name caldoor calcover U G R I lampthar lampfear lampbluc lampredc lolamp hilamp mod1 mod2 ... mod6 imgtype) # # where name must be "DEFAULT" or start with "CAL_" # caldoor = open | close @@ -170,21 +170,22 @@ ACQUIRE_TCS_MAX_OFFSET=60 # the maximum allowable offset sent to the # U,G,R,I = on | off # indicates which channels to enable/disable # lamp* = on | off # lamp power # mod* = on | off # lamp modulator -# for a total of 19 required parameters +# imgtype = # FITS IMGTYPE keyword +# for a total of 20 required parameters # name=SCIENCE defines science target operation # -# name door cover U G R I thar fear bluc redc llmp hlmp mod1 mod2 mod3 mod4 mod5 mod6 -CAL_TARGET=(CAL_THAR open close on on on on on on on on off off off off off off off on ) -CAL_TARGET=(CAL_FEAR open close on on on on on on on on off off on off off off off off) -CAL_TARGET=(CAL_THAR_UG open close on on off off on on on on off off off off off off off on ) -CAL_TARGET=(CAL_FEAR_UG open close on on off off on on on on off off on off off off off off) -CAL_TARGET=(CAL_CONTR open close on on on on on on on on off off off off off on off off) -CAL_TARGET=(CAL_CONTB open close on on on on on on on on off off off off off off on off) -CAL_TARGET=(CAL_DOME close open on on on on off off off off off on off off off off off off) -CAL_TARGET=(CAL_DOME_UG close open on on off off off off off off off on off off off off off off) -CAL_TARGET=(CAL_BIAS close close on on on on off off off off off off off off off off off off) -CAL_TARGET=(CAL_DARK close close on on on on off off off off off off off off off off off off) -CAL_TARGET=(SCIENCE close open on on on on off off off off off off off off off off off off) +# name door cover U G R I thar fear bluc redc llmp hlmp mod1 mod2 mod3 mod4 mod5 mod6 imgtype +CAL_TARGET=(CAL_THAR open close on on on on on on on on off off off off off off off on THAR ) +CAL_TARGET=(CAL_FEAR open close on on on on on on on on off off on off off off off off FEAR ) +CAL_TARGET=(CAL_THAR_UG open close on on off off on on on on off off off off off off off on THAR ) +CAL_TARGET=(CAL_FEAR_UG open close on on off off on on on on off off on off off off off off FEAR ) +CAL_TARGET=(CAL_CONTR open close on on on on on on on on off off off off off on off off CONT ) +CAL_TARGET=(CAL_CONTB open close on on on on on on on on off off off off off off on off CONT ) +CAL_TARGET=(CAL_DOME close open on on on on off off off off off on off off off off off off DOMEFLAT) +CAL_TARGET=(CAL_DOME_UG close open on on off off off off off off off on off off off off off off DOMEFLAT) +CAL_TARGET=(CAL_BIAS close close on on on on off off off off off off off off off off off off BIAS ) +CAL_TARGET=(CAL_DARK close close on on on on off off off off off off off off off off off off DARK ) +CAL_TARGET=(SCIENCE close open on on on on off off off off off off off off off off off off SCI ) # miscellaneous # diff --git a/Config/thermald.cfg.in b/Config/thermald.cfg.in index 3fa71973..e712f199 100644 --- a/Config/thermald.cfg.in +++ b/Config/thermald.cfg.in @@ -7,7 +7,13 @@ NBPORT=@THERMALD_NB_PORT@ # thermald server non-blocking port DAEMON=yes # run as daemon? ASYNCPORT=@MESSAGEPORT@ # asynchronous message port ASYNCGROUP=239.1.1.234 # asynchronous message broadcast group -PUBLISHER_PORT="tcp://127.0.0.1:@THERMALD_PUB_PORT@" # my zeromq pub port + +# Message pub/sub +# PUB_ENDPOINT=tcp://127.0.0.1: +# SUB_ENDPOINT=tcp://127.0.0.1: +# +PUB_ENDPOINT="tcp://127.0.0.1:@MESSAGE_BROKER_SUB_PORT@" +SUB_ENDPOINT="tcp://127.0.0.1:@MESSAGE_BROKER_PUB_PORT@" # database configuration # @@ -102,15 +108,12 @@ CAMP_CHAN="22 TFLEXCON_UR" CAMP_CHAN="23 TACAM" # ----------------------------------------------------------------------------- -# TELEM_PROVIDER=( ) +# SUBSCRIBE_TO=( ) # -# This is a list of telemetry providers where is the daemon name, -# and is the port on which to send the telemetry request. +# This is a list of pub/sub sources to subscribe to, where is the daemon +# name and is its ZeroMQ publish endpoint. # Provide one per line. # -TELEM_PROVIDER=(acamd @ACAMD_NB_PORT@) -TELEM_PROVIDER=(slicecamd @SLICECAMD_NB_PORT@) -# SUBSCRIBE_TO=(acamd "tcp://127.0.0.1:@ACAMD_PUB_PORT@") SUBSCRIBE_TO=(slicecamd "tcp://127.0.0.1:@SLICECAMD_PUB_PORT@") diff --git a/LKS/lks.cpp b/LKS/lks.cpp index 88d7778e..f3423cc3 100644 --- a/LKS/lks.cpp +++ b/LKS/lks.cpp @@ -172,9 +172,9 @@ namespace LKS { long retval=0; #ifdef LOGLEVEL_DEBUG -// message << "[DEBUG] send to LKS " << this->model << " (" << this->name << ") on socket " -// << this->sock.gethost() << "/" << this->sock.getport() << ": " << cmd; -// logwrite( function, message.str() ); + message << "[DEBUG] send to LKS " << this->model << " (" << this->name << ") on socket " + << this->sock.gethost() << "/" << this->sock.getport() << ": " << cmd; + logwrite( function, message.str() ); #endif std::lock_guard lock( this->mtx ); @@ -228,10 +228,6 @@ namespace LKS { retstring = reply; -#ifdef LOGLEVEL_DEBUG -// message << " reply=" << reply; logwrite( function, message.str() ); -#endif - return( error ); } /***** LKS::Interface::send_command *****************************************/ diff --git a/PI/CMakeLists.txt b/PI/CMakeLists.txt index cb6e897d..abb08161 100644 --- a/PI/CMakeLists.txt +++ b/PI/CMakeLists.txt @@ -10,7 +10,7 @@ set( PI_DIR ${PROJECT_BASE_DIR}/PI ) set( CMAKE_CXX_STANDARD 17 ) -add_definitions( -Wall -ansi -O1 -Wno-variadic-macros -ggdb ) +add_definitions( -Wall -O1 -Wno-variadic-macros -ggdb ) include_directories( ${PROJECT_BASE_DIR}/utils ) include_directories( ${PROJECT_BASE_DIR}/common ) diff --git a/PI/pi.cpp b/PI/pi.cpp index e8cf567a..c892ba66 100644 --- a/PI/pi.cpp +++ b/PI/pi.cpp @@ -424,6 +424,11 @@ namespace Physik_Instrumente { this->thread_error.store( NO_ERROR ); // initialize the thread_error state. + // vector of threads to perform the operation for each motor + // + std::vector workers; + workers.reserve(motornames.size()); + for ( size_t n=0; n < motornames.size(); n++ ) { auto name = motornames[n]; @@ -434,16 +439,15 @@ namespace Physik_Instrumente { // Spawn a thread to performm the move. // If there is more than one then they can be done in parallel. // - std::thread( _dothread_moveto, std::ref( *this ), name, addr, axis, position ).detach(); - this->motors_running++; + try { + workers.emplace_back( _dothread_moveto, std::ref( *this ), name, addr, axis, position ); + } + catch(...) { for ( auto &t : workers ) t.join(); throw; } } // wait for the threads to finish - // TODO add a way to abort this // - while ( this->motors_running != 0 ) { - std::this_thread::sleep_for( std::chrono::milliseconds( 1 ) ); - } + for ( auto &t : workers ) t.join(); logwrite( function, "move(s) complete" ); @@ -685,8 +689,6 @@ namespace Physik_Instrumente { iface.thread_error.fetch_or( error ); // preserve any error returned - --iface.motors_running; // atomically decrement the number of motors waiting - message.str(""); message << "completed move " << name << ( error!=NO_ERROR ? " with error" : "" ); logwrite( function, message.str() ); @@ -811,6 +813,11 @@ namespace Physik_Instrumente { // this->thread_error.store( NO_ERROR ); + // vector of threads to perform the operation for each motor + // + std::vector workers; + workers.reserve(name_list.size()); + // Now loop through the built up list of motor names // for ( const auto &name : name_list ) { @@ -823,22 +830,22 @@ namespace Physik_Instrumente { message << " }"; logwrite( function, message.str() ); retstring="unknown_motor"; + for ( auto &t : workers ) t.join(); // join any workers spawned before returning return ERROR; } // Spawn a thread to performm the home move. // If there is more than one then they can be done in parallel. // - std::thread( _dothread_home, std::ref( *this ), name ).detach(); - this->motors_running++; + try { + workers.emplace_back( _dothread_home, std::ref( *this ), name ); + } + catch(...) { for ( auto &t : workers ) t.join(); throw; } } // wait for the threads to finish - // TODO add a way to abort this // - while ( this->motors_running != 0 ) { - std::this_thread::sleep_for( std::chrono::milliseconds( 1 ) ); - } + for ( auto &t : workers ) t.join(); logwrite( function, "home complete" ); @@ -878,14 +885,12 @@ namespace Physik_Instrumente { message.str(""); message << "ERROR: name \"" << name << "\" not in motormap: " << e.what(); logwrite( function, message.str() ); iface.thread_error.fetch_or( ERROR ); // preserve this error - --iface.motors_running; // atomically decrement the number of motors waiting return; } if ( reftype.empty() ) { message.str(""); message << "NOTICE referencing not available for " << name; logwrite( function, message.str() ); - --iface.motors_running; // atomically decrement the number of motors waiting return; } @@ -937,8 +942,6 @@ namespace Physik_Instrumente { iface.thread_error.fetch_or( error ); // preserve any error returned - --iface.motors_running; // atomically decrement the number of motors waiting - message.str(""); message << "completed home " << name << ( error!=NO_ERROR ? " with error" : "" ); logwrite( function, message.str() ); diff --git a/PI/pi.h b/PI/pi.h index f452ae45..837d65d8 100644 --- a/PI/pi.h +++ b/PI/pi.h @@ -422,7 +422,6 @@ namespace Physik_Instrumente { int move_timeout; int home_timeout; float tolerance; - volatile std::atomic motors_running; volatile std::atomic thread_error; std::map, Network::TcpSocket> socketmap; std::map> motormap; @@ -555,13 +554,13 @@ namespace Physik_Instrumente { // Interface() : initialized(false), name(""), pi_mutex(std::make_unique()), move_timeout(60000), home_timeout(60000), tolerance(0.001), - motors_running(0), thread_error(NO_ERROR) { } + thread_error(NO_ERROR) { } // Constructor initializes move and home timeouts // Interface( int TO_move, int TO_home, float tol ) : initialized(false), name(""), pi_mutex(std::make_unique()), move_timeout(TO_move), home_timeout(TO_home), tolerance(tol), - motors_running(0), thread_error(NO_ERROR) { } + thread_error(NO_ERROR) { } // Copy constructor // @@ -571,7 +570,6 @@ namespace Physik_Instrumente { move_timeout(other.move_timeout), home_timeout(other.home_timeout), tolerance(other.tolerance), - motors_running(other.motors_running), thread_error(other.thread_error), socketmap(other.socketmap), motormap(other.motormap) {} @@ -586,7 +584,6 @@ namespace Physik_Instrumente { move_timeout = other.move_timeout; home_timeout = other.home_timeout; tolerance = other.tolerance; - motors_running = other.motors_running; thread_error = other.thread_error; socketmap = other.socketmap; motormap = other.motormap; @@ -602,7 +599,6 @@ namespace Physik_Instrumente { move_timeout(std::move(other.move_timeout)), home_timeout(std::move(other.home_timeout)), tolerance(std::move(other.tolerance)), - motors_running(std::move(other.motors_running)), thread_error(std::move(other.thread_error)), socketmap(std::move(other.socketmap)), motormap(std::move(other.motormap)) { other.initialized = false; } @@ -617,12 +613,10 @@ namespace Physik_Instrumente { move_timeout = std::move( other.move_timeout ); home_timeout = std::move( other.home_timeout ); tolerance = std::move( other.tolerance ); - motors_running = std::move( other.motors_running ); thread_error = std::move( other.thread_error ); socketmap = std::move( other.socketmap ); motormap = std::move( other.motormap ); other.initialized = false; - other.motors_running = 0; other.thread_error = NO_ERROR; } return *this; diff --git a/acamd/acam_interface.cpp b/acamd/acam_interface.cpp index 71ab89c7..f8ef8eeb 100644 --- a/acamd/acam_interface.cpp +++ b/acamd/acam_interface.cpp @@ -1183,11 +1183,6 @@ namespace Acam { Py_DECREF( pArgList ); Py_DECREF( pKeywords ); -//#ifdef LOGLEVEL_DEBUG -// message.str(""); message << "[DEBUG] Python call time " << (t1-t0) << " sec"; -// logwrite( function, message.str() ); -//#endif - // Check the return values from Python here // if ( !pReturn ) { @@ -1427,32 +1422,9 @@ namespace Acam { * */ void Interface::publish_snapshot() { - // force-publish status + // emit state on Topic::ACAMD and temperature on Topic::ACAMD_TEMP this->publish_status(true); - - nlohmann::json jmessage_out; - jmessage_out[Key::SOURCE] = Topic::ACAMD; - - int ccdtemp=99; - this->camera.andor.get_temperature( ccdtemp ); // temp is int - jmessage_out[Key::Acamd::TANDOR] = ( this->isopen("camera") ? - static_cast(ccdtemp) : // but the database wants floats - NAN ); - jmessage_out[Key::Acamd::FILTER] = ( this->isopen("motion" ) ? - this->motion.get_current_filtername() : - "not_connected" ); - jmessage_out[Key::Acamd::COVER] = ( this->isopen("motion" ) ? - this->motion.get_current_coverpos() : - "not_connected" ); - - try { - this->publisher->publish( jmessage_out, Topic::SNAPSHOT ); - } - catch ( const std::exception &e ) { - logwrite( "Acam::Interface::publish_snapshot", - "ERROR publishing message: "+std::string(e.what()) ); - return; - } + this->publish_temperature(); } /***** Acam::Interface::publish_snapshot ************************************/ @@ -1507,6 +1479,41 @@ namespace Acam { /***** Acam::Interface::publish_status **************************************/ + /***** Acam::Interface::publish_temperature ********************************/ + /** + * @brief publish only the andor CCD temperature on Topic::ACAMD_TEMP + * @details Published on a fixed interval (see acamd.cpp), not on change, + * since the CCD temperature varies continuously. When the camera + * is not open the thread stays alive but publishes NaN instead + * of attempting a hardware read; this lets the thread resume + * publishing real values when the camera comes back online + * without the get_temperature() error-log spam each cycle. + * + */ + void Interface::publish_temperature() { + nlohmann::json jmessage; + jmessage[Key::SOURCE] = Topic::ACAMD; + + if ( this->isopen("camera") ) { + int ccdtemp=99; + this->camera.andor.get_temperature( ccdtemp ); + jmessage[Key::Acamd::TANDOR] = static_cast(ccdtemp); // database wants float + } + else { + jmessage[Key::Acamd::TANDOR] = NAN; + } + + try { + this->publisher->publish( jmessage, Topic::ACAMD_TEMP ); + } + catch ( const std::exception &e ) { + logwrite( "Acam::Interface::publish_temperature", + "ERROR publishing message: "+std::string(e.what()) ); + } + } + /***** Acam::Interface::publish_temperature ********************************/ + + /***** Acam::Interface::request_snapshot ************************************/ /** * @brief [obsolete] publises request for snapshot @@ -1639,11 +1646,11 @@ namespace Acam { * */ void Interface::handletopic_targetinfo( const nlohmann::json &jmessage ) { - this->database.add_from_json( jmessage, "OBS_ID" ); - this->database.add_from_json( jmessage, "NAME" ); - this->database.add_from_json( jmessage, "POINTMODE" ); - this->database.add_from_json( jmessage, "RA" ); - this->database.add_from_json( jmessage, "DECL" ); + this->database.add_from_json( jmessage, Key::TargetInfo::OBS_ID ); + this->database.add_from_json( jmessage, Key::TargetInfo::NAME ); + this->database.add_from_json( jmessage, Key::TargetInfo::POINTMODE ); + this->database.add_from_json( jmessage, Key::TargetInfo::RA ); + this->database.add_from_json( jmessage, Key::TargetInfo::DECL ); } /***** Acam::Interface::handletopic_targetinfo ******************************/ @@ -3344,6 +3351,7 @@ logwrite( function, message.str() ); // if ( requested_mode == Acam::TARGET_NOP ) { this->stop_acquisition.store( true, std::memory_order_release ); + this->is_acquired.store( false, std::memory_order_release ); // target no longer acquired when stopped logwrite( function, "stop requested" ); } @@ -4078,17 +4086,17 @@ logwrite( function, message.str() ); // BoolState shutting_down( this->is_shutting_down ); - // close the cover (if motion is in use) - // - if ( this->motion.is_open() ) error |= this->motion.cover( "close", dontcare ); - - // diable target acquisition + // Stop target acquisition and the framegrab thread FIRST so the guider + // stops sending pt_offsets to TCS during the slow cover-close that + // follows. TCS is shut down at the orchestration layer after acamd + // finishes (see Sequencer::Sequence::shutdown phase split). // error |= this->acquire( "stop", dontcare); + error |= this->framegrab( "stop", dontcare ); - // stop the framegrab thread + // close the cover (if motion is in use) - this is the slow step // - error |= this->framegrab( "stop", dontcare ); + if ( this->motion.is_open() ) error |= this->motion.cover( "close", dontcare ); // request stop the focus monitor // @@ -4099,6 +4107,12 @@ logwrite( function, message.str() ); // error |= this->close( "all", dontcare ); + // publish post-shutdown state so subscribers see is_acquired=false / + // acquire_mode="stopped" before publishing stops; forced to bypass + // publish_status's change-detect early-return. + // + this->publish_status(true); + if ( error == NO_ERROR ) logwrite( function, "acam interfaces shut down" ); else logwrite( function, "ERROR shutting down acam interfaces" ); diff --git a/acamd/acam_interface.h b/acamd/acam_interface.h index 04e29eb8..9793d863 100644 --- a/acamd/acam_interface.h +++ b/acamd/acam_interface.h @@ -540,8 +540,6 @@ namespace Acam { std::vector db_info; ///< info for constructing telemetry Database object - std::map telemetry_providers; ///< map of port[daemon_name] for external telemetry providers - struct { std::string tcsname; bool is_tcs_open; @@ -664,9 +662,9 @@ namespace Acam { long bin( std::string args, std::string &retstring ); void publish_snapshot(); void publish_status(bool force=false); + void publish_temperature(); ///< publish only the andor temperature on Topic::ACAMD (periodic) void request_snapshot(); bool wait_for_snapshots(); - long handle_json_message( std::string message_in ); long initialize_python_objects(); /// provides interface to initialize all Python modules for objects in this class long test_image(); /// long open( std::string args, std::string &help); /// wrapper to open all acam-related hardware components diff --git a/acamd/acam_server.cpp b/acamd/acam_server.cpp index 082cf6b5..85d5444c 100644 --- a/acamd/acam_server.cpp +++ b/acamd/acam_server.cpp @@ -365,30 +365,6 @@ namespace Acam { applied++; } - // TELEM_PROVIDER : contains daemon name and port to contact for header telemetry info - // (these don't get counted with "applied++") - // - if ( config.param[entry] == "TELEM_PROVIDER" ) { - std::vector tokens; - Tokenize( config.arg[entry], tokens, " " ); - try { - if ( tokens.size() == 2 ) { - this->interface.telemetry_providers[tokens.at(0)] = std::stod(tokens.at(1)); - } - else { - message.str(""); message << "ERROR bad format TELEM_PROVIDER=\"" << config.arg[entry] << "\": expected "; - logwrite( function, message.str() ); - return ERROR; - } - } - catch ( const std::exception &e ) { - message.str(""); message << "ERROR parsing TELEM_PROVIDER from " << config.arg[entry] << ": " << e.what(); - logwrite( function, message.str() ); - return ERROR; - } - message.str(""); message << "config:" << config.param[entry] << "=" << config.arg[entry]; - this->interface.async.enqueue_and_log( to_uppercase(DAEMON_NAME), function, message.str() ); - } } // end loop through the entries in the configuration file @@ -426,7 +402,14 @@ namespace Acam { auto newlogtime = next_occurrence( 12, 01, 00 ); std::this_thread::sleep_until( newlogtime ); close_log(); - init_log( logpath, DAEMON_NAME ); + // retry the re-open on a short timer so a transient failure (missing + // datedir, permission/owner drift, full disk) doesn't silence logging + // for ~24h until the next rotation + while ( init_log( logpath, DAEMON_NAME ) != 0 ) { + std::cerr << get_timestamp() << " (Acam::Server::new_log_day) " + << "ERROR: log rotation failed to open new logfile; retrying in 60s\n"; + std::this_thread::sleep_for( std::chrono::seconds(60) ); + } // ensure it doesn't immediately re-open std::this_thread::sleep_for( std::chrono::seconds(1) ); } @@ -542,7 +525,7 @@ namespace Acam { * Valid commands are listed in acamd_commands.h * */ - void Server::doit( Network::TcpSocket sock ) { + void Server::doit( Network::TcpSocket &sock ) { std::string function = "Acam::Server::doit"; long ret; std::stringstream message; diff --git a/acamd/acam_server.h b/acamd/acam_server.h index d3210b5e..87f70dac 100644 --- a/acamd/acam_server.h +++ b/acamd/acam_server.h @@ -112,7 +112,7 @@ namespace Acam { static void thread_main( Acam::Server &server, std::shared_ptr sock ); static void async_main( Acam::Server &acam, Network::UdpSocket sock ); ///< asynchronous message sending thread - void doit(Network::TcpSocket sock); ///< the workhorse of each thread connetion + void doit(Network::TcpSocket &sock); ///< the workhorse of each thread connetion void exit_cleanly(); ///< exit long configure_acamd(); ///< read and apply the configuration file long configure_telemetry(); ///< read and apply telem configuration diff --git a/acamd/acamd.cpp b/acamd/acamd.cpp index 0bc34dcb..3664b7b8 100644 --- a/acamd/acamd.cpp +++ b/acamd/acamd.cpp @@ -185,6 +185,19 @@ int main(int argc, char **argv) { // publish snapshot of my telemetry so the world knows I'm online acamd.interface.publish_snapshot(); + std::this_thread::sleep_for( std::chrono::milliseconds(250) ); + acamd.interface.request_snapshot(); + + // publish the andor CCD temperature on a fixed 60-second interval + // (temperature varies continuously, so it is not published on change) + // + std::thread( []( Acam::Interface &iface ) { + while ( true ) { + iface.publish_temperature(); + std::this_thread::sleep_for( std::chrono::seconds(60) ); + } + }, std::ref(acamd.interface) ).detach(); + // This will pre-thread N_THREADS threads. // There will be N_THREADS-1 non-blocking threads, then // loop forever on Accept to dynamically spawn a new thread for each diff --git a/calibd/calib_interface.cpp b/calibd/calib_interface.cpp index 216412ce..6a40a628 100644 --- a/calibd/calib_interface.cpp +++ b/calibd/calib_interface.cpp @@ -687,93 +687,100 @@ namespace Calib { /***** Calib::Interface::close **********************************************/ - /***** Calib::Interface::publish_snapshot ***********************************/ + /***** Calib::Interface::get_status *****************************************/ /** - * @brief publishes a snapshot of my telemetry - * @details This publishes a JSON message containing a snapshot of my - * telemetry. + * @brief read current calib state (modulators + actuators) into status * */ - void Interface::publish_snapshot() { - std::string dontcare; - this->publish_snapshot(dontcare); - } - void Interface::publish_snapshot( std::string &retstring ) { - const std::string function("Calib::Interface::publish_snapshot"); + void Interface::get_status() { std::string ret_isopen; - // assemble the telemetry into a json message - // - nlohmann::json jmessage_out; - - jmessage_out["source"] = "calibd"; // source of this telemetry + this->status.values.clear(); - // get the status for each modulator in the map - // assemble message string of "pow dut per" indexed by name + // modulator state: "pow dut per" indexed by modulator name // - this->is_open("lampmod",ret_isopen); + this->is_open( "lampmod", ret_isopen ); for ( const auto &[num,name] : this->modulator.modmap_num ) { - // initialize these on each pass double dut=NAN; double per=NAN; int pow=-1; std::stringstream retstream; - if (ret_isopen=="true") this->modulator.status( num, dut, per, pow ); + if ( ret_isopen=="true" ) this->modulator.status( num, dut, per, pow ); switch(pow) { case 0 : retstream << "off "; break; case 1 : retstream << "on " ; break; default: retstream << "err "; break; } - if ( std::isnan(dut) ) retstream << "nan "; else retstream << dut << " "; if ( std::isnan(per) ) retstream << "nan "; else retstream << per; - jmessage_out[name] = retstream.str(); + this->status.values[name] = retstream.str(); } - // get a copy of the motormap and - // loop through all motors, getting their actuator position + // actuator state: CAL indexed by uppercase motor name // - auto _motormap = this->motion.motorinterface.get_motormap(); // local copy of motormap - + auto _motormap = this->motion.motorinterface.get_motormap(); std::string retmotion; - this->is_open("motion",retmotion); - bool ismotion = (retmotion=="true" ? true : false); - + this->is_open( "motion", retmotion ); + bool ismotion = ( retmotion=="true" ); for ( const auto &mot : _motormap ) { - if (ismotion) this->motion.get( mot.first, retstring ); // get position of actuator - std::string key="CAL"+mot.first; // key = CALxxxx - make_uppercase(key); // make key uppercase - jmessage_out[ key ] = (ismotion?retstring:"not_connected"); // store in JSON message + std::string posret; + if ( ismotion ) this->motion.get( mot.first, posret ); + std::string key = "CAL" + mot.first; + make_uppercase( key ); + this->status.values[key] = ( ismotion ? posret : "not_connected" ); + } + } + /***** Calib::Interface::get_status *****************************************/ + + + /***** Calib::Interface::publish_status *************************************/ + /** + * @brief publish calib state, but only if it changed (or forced) + * @param[in] force optional (default=false) publish irrespective of change + * + */ + void Interface::publish_status( bool force ) { + // Serialize the publish-on-change critical section against concurrent callers + // (doit threads + subscriber thread). NOTE: held across get_status() hardware I/O + // for now — a contained per-daemon stall, not socket corruption. @TODO revisit: + // refactor get_status() to build a local and swap under a short lock. + std::lock_guard lock( this->publish_mutex ); + + // refresh current state from hardware + // + this->get_status(); + + // unless forced, only publish if the state changed + // + if ( !force && this->status == this->last_published_status ) return; + + nlohmann::json jmessage_out; + jmessage_out[Key::SOURCE] = Topic::CALIBD; + for ( const auto &[key,val] : this->status.values ) { + jmessage_out[key] = val; } - // for backwards compatibility - jmessage_out["messagetype"]="calibinfo"; - retstring = jmessage_out.dump(); // serialize the json message into retstring - retstring.append(JEOF); // append the JSON message terminator + this->last_published_status = this->status; try { this->publisher->publish( jmessage_out ); } catch( const std::exception &e ) { - logwrite( "Calib::Interface::publish_snapshot", + logwrite( "Calib::Interface::publish_status", "ERROR publishing message: "+std::string(e.what()) ); } } - /***** Calib::Interface::publish_snapshot ***********************************/ + /***** Calib::Interface::publish_status *************************************/ void Interface::handletopic_snapshot( const nlohmann::json &jmessage ) { - // If my name is in the jmessage then publish my snapshot + // If my topic is in the jmessage then publish my status // - if ( jmessage.contains( Calib::DAEMON_NAME ) ) { - this->publish_snapshot(); - } - else - if ( jmessage.contains( "test" ) ) { - logwrite( "Calib::Interface::handletopic_snapshot", jmessage.dump() ); + if ( jmessage.contains( Topic::CALIBD ) ) { + this->publish_status(); } } diff --git a/calibd/calib_interface.h b/calibd/calib_interface.h index 430b67be..cd8dc8a3 100644 --- a/calibd/calib_interface.h +++ b/calibd/calib_interface.h @@ -145,6 +145,21 @@ namespace Calib { private: zmqpp::context context; + /** + * @struct Status + * @brief published calib state: value strings indexed by key (modulators + actuators) + */ + struct Status { + std::map values; + bool operator==(const Status &o) const { return values == o.values; } + bool operator!=(const Status &o) const { return !(*this == o); } + }; + Status status; ///< current calib state + Status last_published_status; ///< last published calib state + std::mutex publish_mutex; ///< serializes publish-on-change; held over get_status() for now — @TODO revist + + void get_status(); ///< refresh status from hardware + public: Interface() : context(), @@ -153,7 +168,7 @@ namespace Calib { should_subscriber_thread_run(false) { topic_handlers = { - { "_snapshot", std::function( + { Topic::SNAPSHOT, std::function( [this](const nlohmann::json &msg) { handletopic_snapshot(msg); } ) } }; } @@ -188,8 +203,7 @@ namespace Calib { void handletopic_snapshot( const nlohmann::json &jmessage ); - void publish_snapshot(); - void publish_snapshot(std::string &retstring); + void publish_status( bool force=false ); ///< publish calib state on change (or force) long open(std::string args, std::string &retstring); long is_open(std::string args, std::string &retstring); diff --git a/calibd/calib_server.cpp b/calibd/calib_server.cpp index 73460ae7..664fc0de 100644 --- a/calibd/calib_server.cpp +++ b/calibd/calib_server.cpp @@ -302,7 +302,14 @@ namespace Calib { auto newlogtime = next_occurrence( 12, 01, 00 ); std::this_thread::sleep_until( newlogtime ); close_log(); - init_log( logpath, DAEMON_NAME ); + // retry the re-open on a short timer so a transient failure (missing + // datedir, permission/owner drift, full disk) doesn't silence logging + // for ~24h until the next rotation + while ( init_log( logpath, DAEMON_NAME ) != 0 ) { + std::cerr << get_timestamp() << " (Calib::Server::new_log_day) " + << "ERROR: log rotation failed to open new logfile; retrying in 60s\n"; + std::this_thread::sleep_for( std::chrono::seconds(60) ); + } // ensure it doesn't immediately re-open std::this_thread::sleep_for( std::chrono::seconds(1) ); } @@ -420,7 +427,7 @@ namespace Calib { * Valid commands are listed in acamd_commands.h * */ - void Server::doit(Network::TcpSocket sock) { + void Server::doit(Network::TcpSocket &sock) { std::string function = "Calib::Server::doit"; long ret; std::stringstream message; @@ -629,22 +636,6 @@ namespace Calib { if ( cmd == CALIBD_LAMPMOD ) { ret = this->interface.modulator.control( args, retstring ); } - else - - // telemetry request - // - if ( cmd == SNAPSHOT || cmd == TELEMREQUEST ) { - if ( args=="?" || args=="help" ) { - retstring=TELEMREQUEST+"\n"; - retstring.append( " Returns a serialized JSON message containing telemetry\n" ); - retstring.append( " information, terminated with \"EOF\\n\".\n" ); - ret=HELP; - } - else { - this->interface.publish_snapshot( retstring ); - ret = JSON; - } - } // unknown commands generate an error // @@ -692,7 +683,7 @@ namespace Calib { if ( sock.Write( retstring ) < 0 ) connection_open=false; } - if ( ret==NO_ERROR ) this->interface.publish_snapshot(); + if ( ret==NO_ERROR ) this->interface.publish_status(); if (!sock.isblocking()) break; // Non-blocking connection exits immediately. // Keep blocking connection open for interactive session. diff --git a/calibd/calib_server.h b/calibd/calib_server.h index 9c9d9cb2..9e994abc 100644 --- a/calibd/calib_server.h +++ b/calibd/calib_server.h @@ -96,7 +96,7 @@ namespace Calib { void exit_cleanly(void); ///< exit long configure_calibd(); ///< read and apply the configuration file - void doit(Network::TcpSocket sock); ///< the workhorse of each thread connetion + void doit(Network::TcpSocket &sock); ///< the workhorse of each thread connetion void handle_signal( int signo ); diff --git a/calibd/calibd.cpp b/calibd/calibd.cpp index 2c9da1cd..70663f55 100644 --- a/calibd/calibd.cpp +++ b/calibd/calibd.cpp @@ -126,8 +126,8 @@ int main(int argc, char **argv) { } std::this_thread::sleep_for( std::chrono::milliseconds(500) ); - // publish snapshot of my telemetry so the world knows I'm online - calibd.interface.publish_snapshot(); + // read current state and force-publish so the world knows I'm online + calibd.interface.publish_status( true ); // This will pre-thread N_THREADS threads. // The 0th thread is reserved for the blocking port, and the rest are for the non-blocking port. diff --git a/camerad/astrocam.cpp b/camerad/astrocam.cpp index cb6c547e..de6e6eb7 100644 --- a/camerad/astrocam.cpp +++ b/camerad/astrocam.cpp @@ -25,7 +25,7 @@ namespace AstroCam { * */ void Interface::publish_status(bool force) { - std::lock_guard lock(this->publish_mutex); + std::lock_guard lock(this->publish_mutex); // REQUIRED: serializes publish-on-change + non-thread-safe socket // unless forced, publish only if there was a change if (!force && this->status==this->last_published_status) return; @@ -67,6 +67,68 @@ namespace AstroCam { /***** AstroCam::Interface::handletopic_snapshot ****************************/ + // Each subscriber handler caches the latest full JSON snapshot from its + // provider, keyed by topic. The JSON->FITS-keyword conversion is deferred + // to exposure lock-in (see do_expose / add_cached_telem). + // + void Interface::handletopic_calib( const nlohmann::json &jmessage ) { + std::unique_lock lock(live_telemetry_mtx); + this->live_telemetry[Topic::CALIBD] = jmessage; + } + void Interface::handletopic_flexure( const nlohmann::json &jmessage ) { + std::unique_lock lock(live_telemetry_mtx); + this->live_telemetry[Topic::FLEXURED] = jmessage; + } + void Interface::handletopic_focus( const nlohmann::json &jmessage ) { + std::unique_lock lock(live_telemetry_mtx); + this->live_telemetry[Topic::FOCUSD] = jmessage; + } + void Interface::handletopic_power( const nlohmann::json &jmessage ) { + std::unique_lock lock(live_telemetry_mtx); + this->live_telemetry[Topic::POWERD] = jmessage; + } + void Interface::handletopic_slit( const nlohmann::json &jmessage ) { + std::unique_lock lock(live_telemetry_mtx); + this->live_telemetry[Topic::SLITD] = jmessage; + } + void Interface::handletopic_targetinfo( const nlohmann::json &jmessage ) { + std::unique_lock lock(live_telemetry_mtx); + this->live_telemetry[Topic::TARGETINFO] = jmessage; + } + void Interface::handletopic_tcs( const nlohmann::json &jmessage ) { + std::unique_lock lock(live_telemetry_mtx); + this->live_telemetry[Topic::TCSD] = jmessage; + } + void Interface::handletopic_thermal( const nlohmann::json &jmessage ) { + std::unique_lock lock(live_telemetry_mtx); + this->live_telemetry[Topic::THERMALD] = jmessage; + } + + + /***** AstroCam::Interface::get_live_airmass ********************************/ + /** + * @brief return the latest airmass from cached tcsd pub-sub telemetry + * @details AIRMASS is averaged over the exposure in dothread_shutter and + * written to systemkeys, so it is intentionally not part of the + * FITS telemkeys. Returns NAN when no valid (on-sky) value is + * available. + * @return airmass as double, or NAN if unavailable + * + */ + double Interface::get_live_airmass() { + std::unique_lock lock(live_telemetry_mtx); + auto it = this->live_telemetry.find( Topic::TCSD ); + if ( it != this->live_telemetry.end() ) { + const auto &jmsg = it->second; + if ( jmsg.contains( Key::Tcsd::AIRMASS ) && jmsg.at( Key::Tcsd::AIRMASS ).is_number() ) { + return jmsg.at( Key::Tcsd::AIRMASS ).get(); + } + } + return NAN; + } + /***** AstroCam::Interface::get_live_airmass ********************************/ + + long NewAstroCam::new_expose( std::string nseq_in ) { logwrite( "NewAstroCam::new_expose", nseq_in ); return( NO_ERROR ); @@ -1948,9 +2010,9 @@ namespace AstroCam { interface.do_native( "SPC" ); } - // get the airmass now + // get the latest airmass collected from tcsd telemetry now // - interface.collect_telemetry_key( "tcsd", "AIRMASS", airmass0 ); + airmass0 = interface.get_live_airmass(); // If configured, send a command to the ARC controller to open // the shutter. This is not connected to the shutter but can be @@ -2010,9 +2072,9 @@ namespace AstroCam { interface.broadcast.notice( function, "external shutter closed at "+timestring ); } - // get the airmass again + // get the latest airmass again // - interface.collect_telemetry_key( "tcsd", "AIRMASS", airmass1 ); + airmass1 = interface.get_live_airmass(); // average airmass // @@ -2633,6 +2695,34 @@ namespace AstroCam { /***** AstroCam::Interface::dothread_monitor_exposure_pending ***************/ + /***** AstroCam::add_cached_telem ******************************************/ + /** + * @brief add one provider's cached JSON telemetry into a FITS Header + * @details Primary tables route to the primary header; Extension tables + * carry a channel and route to the extension (elmo) map. This is + * the same keyinfo routing the subscriber handlers used to do, + * now applied once at lock-in from the cached JSON snapshot. + * @param[in,out] telem Header to populate + * @param[in] jmsg cached JSON snapshot for one provider + * @param[in] keys keyinfo table (Primary[] or Extension[]) + * + */ + template + static void add_cached_telem( Common::Header &telem, + const nlohmann::json &jmsg, + const KeyT (&keys)[N] ) { + for ( const auto &k : keys ) { + if constexpr ( std::is_same_v ) { + telem.add_json_key( jmsg, k.jkey, k.keyword, k.comment, k.type, EXT, k.chan ); + } + else { + telem.add_json_key( jmsg, k.jkey, k.keyword, k.comment, k.type, PRI ); + } + } + } + /***** AstroCam::add_cached_telem ******************************************/ + + /***** AstroCam::Interface::do_expose ***************************************/ /** * @brief initiate an exposure @@ -2705,9 +2795,32 @@ namespace AstroCam { logwrite( function, message.str() ); #endif - // Collect telemetry, which will be stored in camera_info.telemkeys + // telemetry is locked-in here -- + // build the FITS telemetry header from the latest JSON snapshots cached by + // the subscriber handlers. Built fresh so a provider that has gone silent + // does not leave stale keys behind. // - this->collect_telemetry(); + { + std::unique_lock lock(live_telemetry_mtx); + + Common::Header telem; + + auto add = [&]( const std::string &topic, const auto &keytable ) { + auto it = this->live_telemetry.find( topic ); + if ( it != this->live_telemetry.end() ) add_cached_telem( telem, it->second, keytable ); + }; + + add( Topic::CALIBD, FitsHeaderKeys::CalibInfoKeys ); // primary + add( Topic::POWERD, FitsHeaderKeys::PowerInfoKeys ); // primary + add( Topic::SLITD, FitsHeaderKeys::SlitInfoKeys ); // primary + add( Topic::TARGETINFO, FitsHeaderKeys::TargetInfoKeys ); // primary + add( Topic::TCSD, FitsHeaderKeys::TcsInfoKeys ); // primary + add( Topic::FLEXURED, FitsHeaderKeys::FlexureInfoKeys ); // extension + add( Topic::FOCUSD, FitsHeaderKeys::FocusInfoKeys ); // extension + add( Topic::THERMALD, FitsHeaderKeys::ThermalInfoKeys ); // extension + + this->camera_info.telemkeys = telem; + } // Make a copy of this->camera_info for this particular exposure buffer number. // This expinfo will be used for this particular exposure. @@ -2810,11 +2923,9 @@ namespace AstroCam { timespec timenow = Time::getTimeNow(); // get the time NOW std::string timestring = timestamp_from( timenow ); // format that time as YYYY-MM-DDTHH:MM:SS.sss double mjd = mjd_from( timenow ); // modified Julian date of start - double airmass=NAN; - - // get the airmass from tcsd telemetry now + // get the latest airmass collected from tcsd telemetry now // - this->collect_telemetry_key( "tcsd", "AIRMASS", airmass ); + double airmass = this->get_live_airmass(); this->fitsinfo[this_expbuf]->systemkeys.primary().addkey( "EXPSTART", timestring, "exposure start time" ); this->fitsinfo[this_expbuf]->systemkeys.primary().addkey( "MJD0", mjd, "exposure start time (modified Julian Date)" ); @@ -3067,358 +3178,8 @@ namespace AstroCam { /***** AstroCam::Interface::do_expose ***************************************/ - /***** AstroCam::Interface::make_telemetry_message **************************/ - /** - * @brief assembles my telemetry message - * @details This creates a JSON message for my telemetry info, then serializes - * it into a std::string ready to be sent over a socket. - * @param[out] retstring string containing the serialization of the JSON message - * - */ - void Interface::make_telemetry_message( std::string &retstring ) { - // assemble the telemetry I want to report into a json message - // Set a messagetype keyword to indicate what kind of message this is. - // - nlohmann::json jmessage; - jmessage["messagetype"] = "camerainfo"; - - jmessage["SHUTTIME_SEC"] = this->camera.shutter.get_duration(); // shutter open time in sec - - retstring = jmessage.dump(); // serialize the json message into a string - - retstring.append(JEOF); // append JSON message terminator - - return; - } - /***** AstroCam::Interface::make_telemetry_message **************************/ - - - /***** AstroCam::Interface::collect_telemetry *******************************/ - /** - * @brief send the TELEMREQUEST command to each configured daemon to get telemetry - * @details This overloaded version accepts a name, for the case where - * telemetry is needed from one provider only (e.g. TCS) - * @param[in] name name of provider from TELEM_PROVIDER config key - * @param[out] retstring serialized string of json telemetry message - * - */ - void Interface::collect_telemetry(const std::string name, std::string &retstring) { - Common::DaemonClient jclient("", "\n", JEOF ); - auto it = this->telemetry_providers.find(name); - if ( it != this->telemetry_providers.end() ) { - jclient.set_name(it->first); - jclient.set_port(it->second); - jclient.connect(); - jclient.command(TELEMREQUEST, retstring); - jclient.disconnect(); - } - return; - } - /***** AstroCam::Interface::collect_telemetry *******************************/ - /** - * @brief send the TELEMREQUEST command to each configured daemon to get telemetry - * - */ - void Interface::collect_telemetry() { - std::string retstring; - - // Instantiate a client to communicate with each daemon, - // constructed with no name, newline termination on command writes, - // and JEOF termination on reply reads. - // - Common::DaemonClient jclient("", "\n", JEOF ); - - // Loop through each configured telemetry provider, which is a map of - // ports indexed by daemon name, both of which are used to update - // the jclient object. - // - // Send the command TELEMREQUEST to each daemon and read back the reply into - // retstring, which will be the serialized JSON telemetry message. - // - // handle_json_message() will parse the reply and set the FITS header - // keys in the telemkeys database. - // - for ( const auto &[name, port] : this->telemetry_providers ) { - jclient.set_name(name); - jclient.set_port(port); - jclient.connect(); - jclient.command(TELEMREQUEST, retstring); - jclient.disconnect(); - handle_json_message(retstring); - } - - return; - } - /***** AstroCam::Interface::collect_telemetry *******************************/ - - - /***** AstroCam::Interface::handle_json_message *****************************/ - /** - * @brief parses incoming telemetry messages - * @param[in] message_in serialized JSON message string - * @return ERROR | NO_ERROR - * - */ - long Interface::handle_json_message( std::string message_in ) { - const std::string function="AstroCam::Interface::handle_json_message"; - std::stringstream message; - std::string messagetype; - long error; - - // nothing to do if the message is empty - // - if ( message_in.empty() ) { - logwrite( function, "empty JSON message" ); - return ERROR; - } - - /** - * @struct PrimaryInfo - * @brief holds info for extracting primary header keys from json message - * @details The value in jmessage with key jkey will be added to the primary - * FITS header, using comment and optional keyword. If keyword is - * not specified then the header keyword uses jkey. - */ - struct PrimaryInfo { - std::string jkey; // key to extract from jmessage - std::string keyword; // optional FITS keyword (uses jkey if not specified) - std::string comment; // FITS key comment - std::string type=""; // optional keyword datatype - }; - - /** - * @struct ExtensionInfo - * @brief holds info for extracting extension header keys from json message - * @details The value in jmessage with key jkey will be added to the FITS - * header specified by channel chan, using comment and optional keyword. - * If keyword is not specified then the header keyword uses jkey. - */ - struct ExtensionInfo { - std::string chan; // chan name identifies which extension - std::string jkey; // key to extract from jmessage - std::string keyword; // optional FITS keyword (uses jkey if not specified) - std::string comment; // FITS key comment - std::string type=""; // optional keyword datatype - }; - - auto &telemkeys = this->camera_info.telemkeys; - - // use to select whether to write to extension or primary - // - bool ext = true; - bool pri = !ext; - - size_t eof_pos = message_in.find(JEOF); - if ( eof_pos != std::string::npos ) message_in.erase(eof_pos); - - try { - nlohmann::json jmessage = nlohmann::json::parse( message_in ); - - // jmessage must not contain key "error" and must contain key "messagetype" - // - if ( !jmessage.contains("error") ) { - if ( jmessage.contains("messagetype") ) { - messagetype = jmessage["messagetype"]; - error = NO_ERROR; - } - else { - logwrite( function, "ERROR received JSON message with no messagetype" ); - error = ERROR; - } - } - else { - logwrite( function, "ERROR in JSON message" ); - error = ERROR; - } - - // If jmessage contained error or no messagetype then get out now. - // - if ( error != NO_ERROR ) return error; - - // telemetry from calibd goes in the primary header - // - if ( messagetype == "calibinfo" ) { - const PrimaryInfo keyarray[] = { - {"MODFEAR", "", "FeAr lamp modulator pow dut per"}, - {"MODTHAR", "", "ThAr lamp modulator pow dut per"}, - {"MODBLCON", "", "Blue continuum modulator pow dut per"}, - {"MODBLBYP", "", "Blue bypass modulator pow dut per"}, - {"MODRDCON", "", "Red continuum modulator pow dut per"}, - {"MODRDBYP", "", "Red bypass modulator pow dut per"}, - {"CALCOVER", "", "calib cover state"}, - {"CALDOOR", "", "calib door state"} - }; - for ( const auto &keyinfo : keyarray ) { - telemkeys.add_json_key(jmessage, keyinfo.jkey, keyinfo.jkey, keyinfo.comment, keyinfo.type, pri); - } - } - else - // telemetry from flexured goes in the extension header corresponding to the channel - // - if ( messagetype == "flexureinfo" ) { - const ExtensionInfo keyarray[] = { - {"I", "FLXSPE_I", "FLXSPE", "I flexure spectral axis 2 (X) in um"}, - {"I", "FLXSPA_I", "FLXSPA", "I flexure spatial axis 3 (Y) in um"}, - {"I", "FLXPIS_I", "FLXPIS", "I flexure piston axis 1 (Z) in um"}, - {"R", "FLXSPE_R", "FLXSPE", "R flexure spectral axis 2 (X) in um"}, - {"R", "FLXSPA_R", "FLXSPA", "R flexure spatial axis 3 (Y) in um"}, - {"R", "FLXPIS_R", "FLXPIS", "R flexure piston axis 1 (Z) in um"}, - {"G", "FLXSPE_G", "FLXSPE", "G flexure spectral axis 2 (X) in um"}, - {"G", "FLXSPA_G", "FLXSPA", "G flexure spatial axis 3 (Y) in um"}, - {"G", "FLXPIS_G", "FLXPIS", "G flexure piston axis 1 (Z) in um"}, - {"U", "FLXSPE_U", "FLXSPE", "U flexure spectral axis 2 (X) in um"}, - {"U", "FLXSPA_U", "FLXSPA", "U flexure spatial axis 3 (Y) in um"}, - {"U", "FLXPIS_U", "FLXPIS", "U flexure piston axis 1 (Z) in um"} - }; - for ( const auto &keyinfo : keyarray ) { - telemkeys.add_json_key(jmessage, keyinfo.jkey, keyinfo.keyword, keyinfo.comment, keyinfo.type, ext, keyinfo.chan); - } - } - else - // telemetry from focusd goes in the extension header corresponding to the channel - // - if ( messagetype == "focusinfo" ) { - const ExtensionInfo keyarray[] = { - {"I", "FOCUSI", "FOCUS", "science camera I focus position in mm" }, - {"R", "FOCUSR", "FOCUS", "science camera R focus position in mm" }, - {"G", "FOCUSG", "FOCUS", "science camera G focus position in mm" }, - {"U", "FOCUSU", "FOCUS", "science camera U focus position in mm" } - }; - for ( const auto &keyinfo : keyarray ) { - telemkeys.add_json_key(jmessage, keyinfo.jkey, keyinfo.keyword, keyinfo.comment, keyinfo.type, ext, keyinfo.chan); - } - } - else - - // telemetry from powerd goes in the primary header - // - if ( messagetype == "powerinfo" ) { - const PrimaryInfo keyarray[] = { - {"LAMPTHAR", "", "is ThAr lamp on"}, - {"LAMPFEAR", "", "is FeAr lamp on"}, - {"LAMPBLUC", "", "is blue Xe continuum lamp on"}, - {"LAMPREDC", "", "is red continuum lamp on"}, - {"LAMPXE", "", "is Xe lamp on"}, - {"LAMPINCA", "", "is Incandescent lamp on"} - }; - for ( const auto &keyinfo : keyarray ) { - telemkeys.add_json_key(jmessage, keyinfo.jkey, keyinfo.jkey, keyinfo.comment, keyinfo.type, pri); - } - } - else - - // telemetry from calibd goes in the primary header - // - if ( messagetype == "slitinfo" ) { - const PrimaryInfo keyarray[] = { - {"SLITW", "", "slit width in arcsec"}, - {"SLITO", "", "slit offset in arcsec"}, - {"SLITPOSA", "", "slit actuator A position in mm"}, - {"SLITPOSA", "", "slit actuator A position in mm"}, - {"SLITPOSB", "", "slit actuator B position in mm"} - }; - for ( const auto &keyinfo : keyarray ) { - telemkeys.add_json_key(jmessage, keyinfo.jkey, keyinfo.jkey, keyinfo.comment, keyinfo.type, pri); - } - } - else - - // targetinfo telemetry comes from sequencerd and goes in the primary header - // - if ( messagetype == "targetinfo" ) { - const PrimaryInfo keyarray[] = { - {"OBS_ID", "", "Observation ID", "INT"}, - {"NAME", "", "target name", "STRING"}, -// {"BINSPECT", "", "binning in spectral direction"}, -// {"BINSPAT", "", "binning in spatial direction"}, - {"SLITA", "", "slit angle in deg", "FLOAT"}, - {"POINTMDE", "", "pointing mode", "STRING"}, - {"RA", "", "requested Right Ascension in J2000", "STRING"}, - {"DECL", "", "requested Declination in J2000", "STRING"} - }; - for ( const auto &keyinfo : keyarray ) { - message.str(""); message << "[DEBUG] targetinfo key " << keyinfo.jkey << "=" << jmessage[keyinfo.jkey]; - logwrite(function,message.str()); - telemkeys.add_json_key(jmessage, keyinfo.jkey, keyinfo.jkey, keyinfo.comment, keyinfo.type, pri); - } - } - else - - // telemetry from tcsd goes into primary header - // AIRMASS is intentionally left out since it is handled differently - // - if ( messagetype == "tcsinfo" ) { - const PrimaryInfo keyarray[] = { - {"CASANGLE", "", "TCS reported Cassegrain angle in deg", "FLOAT"}, - {"HA", "", "hour angle"}, - {"RAOFFSET", "", "offset Right Ascension"}, - {"DECLOFFS", "", "offset Declination"}, - {"TELRA", "", "TCS reported Right Ascension"}, - {"TELDEC", "", "TCS reported Declination"}, - {"AZ", "", "TCS reported azimuth"}, - {"ZENANGLE", "", "TCS reported Zenith angle", "FLOAT"}, - {"DOMEAZ", "", "TCS reported dome azimuth", "FLOAT"}, - {"DOMESHUT", "", "dome shutters"}, - {"TELFOCUS", "", "TCS reported telescope focus position in mm", "FLOAT"} - }; - for ( const auto &keyinfo : keyarray ) { - telemkeys.add_json_key(jmessage, keyinfo.jkey, keyinfo.jkey, keyinfo.comment, keyinfo.type, pri); - } - } - else - - // telemetry from thermald - // - if ( messagetype == "thermalinfo" ) { - const ExtensionInfo keyarray[] = { - {"I", "TCCD_I", "CCDTEMP", "I CCD temperature in Kelvin", "FLOAT"}, - {"R", "TCCD_R", "CCDTEMP", "R CCD temperature in Kelvin", "FLOAT"}, - {"G", "TCCD_G", "CCDTEMP", "G CCD temperature in Kelvin", "FLOAT"}, - {"U", "TCCD_U", "CCDTEMP", "U CCD temperature in Kelvin", "FLOAT"}, - - {"I", "TCOLL_I", "COLTEMP", "I collimator temp in deg C", "FLOAT"}, - {"R", "TCOLL_R", "COLTEMP", "R collimator temp in deg C", "FLOAT"}, - {"G", "TCOLL_G", "COLTEMP", "G collimator temp in deg C", "FLOAT"}, - - {"I", "TFOCUS_I", "FOCTEMP", "I focus temp in deg C", "FLOAT"}, - {"R", "TFOCUS_R", "FOCTEMP", "R focus temp in deg C", "FLOAT"}, - {"G", "TFOCUS_G", "FOCTEMP", "G focus temp in deg C", "FLOAT"}, - {"U", "TFOCUS_U", "FOCTEMP", "U focus temp in deg C", "FLOAT"} - }; - for ( const auto &keyinfo : keyarray ) { - telemkeys.add_json_key(jmessage, keyinfo.jkey, keyinfo.keyword, keyinfo.comment, keyinfo.type, ext, keyinfo.chan); - } - } - else - - // test message - // - if ( messagetype == "test" ) { - message.str(""); message << "received JSON test message: \"" << jmessage["test"].get() << "\""; - logwrite( function, message.str() ); - } - else { - message.str(""); message << "ERROR received unhandled JSON message type \"" << messagetype << "\""; - logwrite( function, message.str() ); - error = ERROR; - } - } - catch ( const nlohmann::json::parse_error &e ) { - message.str(""); message << "ERROR json exception parsing message: " << e.what(); - logwrite( function, message.str() ); - error = ERROR; - } - catch ( const std::exception &e ) { - message.str(""); message << "ERROR parsing message: " << e.what(); - logwrite( function, message.str() ); - error = ERROR; - } - - return error; - } - /***** AstroCam::Interface::handle_json_message *****************************/ /***** AstroCam::Interface::do_load_firmware ********************************/ @@ -6683,83 +6444,6 @@ logwrite(function, message.str()); } else // ---------------------------------------------------- - // telem - // ---------------------------------------------------- - // test sending the telem command - // - if ( testname == "telem" ) { - if ( tokens.size() < 2 ) { - logwrite( function, "ERROR expected an argument" ); - retstring="invalid_argument"; - return ERROR; - } - - if ( tokens[1] == "?" || tokens[1] == "help" ) { - retstring = CAMERAD_TEST; - retstring.append( " telem collect | test | calibd | flexured | focusd | tcsd\n" ); - retstring.append( " collect collects telemetry from all daemons\n" ); - retstring.append( " test sends a test JSON message back to myself (camerad)\n" ); - retstring.append( " all other args collect telemetry from named daemon only\n" ); - return HELP; - } - - if ( tokens[1] == "collect" ) { - this->collect_telemetry(); - return NO_ERROR; - } - - Common::DaemonClient jclient("", "\n", JEOF ); - - if ( tokens[1]=="calibd" ) { - jclient.set_name("calibd"); - jclient.set_port(9101); - jclient.connect(); - jclient.command(TELEMREQUEST, retstring); - jclient.disconnect(); - } - else - if ( tokens[1]=="flexured" ) { - jclient.set_name("flexured"); - jclient.set_port(9103); - jclient.connect(); - jclient.command(TELEMREQUEST, retstring); - jclient.disconnect(); - } - else - if ( tokens[1]=="focusd" ) { - jclient.set_name("focusd"); - jclient.set_port(9104); - jclient.connect(); - jclient.command(TELEMREQUEST, retstring); - jclient.disconnect(); - } - else - if ( tokens[1]=="tcsd" ) { - jclient.set_name("tcsd"); - jclient.set_port(9107); - jclient.connect(); - jclient.command(TELEMREQUEST, retstring); - jclient.disconnect(); - } - else - if ( tokens[1]=="test" ) { - nlohmann::json jmessage; - jmessage["messagetype"] = "test"; - jmessage["test"] = "Hello, world!"; - logwrite( function, "returning JSON test message" ); - retstring = jmessage.dump(); - } - else { - jclient.set_name("camerd"); - jclient.set_port(server.nbport); - jclient.connect(); - jclient.command("test json test", retstring); - jclient.disconnect(); - } - this->handle_json_message( retstring ); - } - else - // ---------------------------------------------------- // isready // ---------------------------------------------------- // am I ready for an exposure? diff --git a/camerad/astrocam.h b/camerad/astrocam.h index 116072ba..10048284 100644 --- a/camerad/astrocam.h +++ b/camerad/astrocam.h @@ -23,6 +23,7 @@ #include #include +#include "fits_header_defs.h" #include "utilities.h" #include "common.h" #include "camera.h" @@ -637,7 +638,23 @@ namespace AstroCam { useframes(true) { topic_handlers = { { Topic::SNAPSHOT, std::function( - [this](const nlohmann::json &msg) { handletopic_snapshot(msg); } ) } + [this](const nlohmann::json &msg) { handletopic_snapshot(msg); } ) }, + { Topic::CALIBD, std::function( + [this](const nlohmann::json &msg) { handletopic_calib(msg); } ) }, + { Topic::FLEXURED, std::function( + [this](const nlohmann::json &msg) { handletopic_flexure(msg); } ) }, + { Topic::FOCUSD, std::function( + [this](const nlohmann::json &msg) { handletopic_focus(msg); } ) }, + { Topic::POWERD, std::function( + [this](const nlohmann::json &msg) { handletopic_power(msg); } ) }, + { Topic::SLITD, std::function( + [this](const nlohmann::json &msg) { handletopic_slit(msg); } ) }, + { Topic::TARGETINFO, std::function( + [this](const nlohmann::json &msg) { handletopic_targetinfo(msg); } ) }, + { Topic::TCSD, std::function( + [this](const nlohmann::json &msg) { handletopic_tcs(msg); } ) }, + { Topic::THERMALD, std::function( + [this](const nlohmann::json &msg) { handletopic_thermal(msg); } ) } }; this->pFits.resize( NUM_EXPBUF ); // pre-allocate FITS_file object pointers for each exposure buffer @@ -670,6 +687,9 @@ namespace AstroCam { Camera::Camera camera; /// instantiate a Camera object Camera::Information camera_info; /// this is the main camera_info object + std::map live_telemetry; ///< latest JSON snapshot per provider, keyed by Topic + std::mutex live_telemetry_mtx; + std::unique_ptr publisher; ///< publisher object std::string publisher_address; ///< publish socket endpoint std::string publisher_topic; ///< my default topic for publishing @@ -693,6 +713,14 @@ namespace AstroCam { void publish_status(bool force=false); void request_snapshot(); void handletopic_snapshot(const nlohmann::json &jmessage_in); + void handletopic_calib(const nlohmann::json &jmessage_in); + void handletopic_flexure(const nlohmann::json &jmessage_in); + void handletopic_focus(const nlohmann::json &jmessage_in); + void handletopic_power(const nlohmann::json &jmessage_in); + void handletopic_slit(const nlohmann::json &jmessage_in); + void handletopic_targetinfo(const nlohmann::json &jmessage_in); + void handletopic_tcs(const nlohmann::json &jmessage_in); + void handletopic_thermal(const nlohmann::json &jmessage_in); Common::Broadcaster broadcast { this->publisher, Daemon::CAMERAD }; @@ -1139,8 +1167,6 @@ std::vector> fitsinfo; std::map< std::string, readout_info_t > readout_source; //!< STL map of readout sources indexed by readout name - std::map telemetry_providers; //!< a map of port[daemon_name] for telemetry providers - // Functions // void get_logical(Controller* pcontroller, @@ -1150,7 +1176,6 @@ std::vector> fitsinfo; Controller* get_active_controller(const int dev); void exposure_progress(); void make_image_keywords( int dev ); - long handle_json_message( std::string message_in ); long parse_spec_info( std::string args ); long parse_det_geometry( std::string args ); long parse_controller_config( std::string args ); @@ -1210,9 +1235,7 @@ std::vector> fitsinfo; long expose(std::string nexp_in); long do_expose(int nexp_in); - void make_telemetry_message( std::string &retstring ); - void collect_telemetry(); - void collect_telemetry(std::string name, std::string &retstring); + double get_live_airmass(); ///< latest airmass from cached tcsd telemetry, or NAN long native(std::string cmdstr); long native(std::string cmdstr, std::string &retstring); @@ -1254,77 +1277,6 @@ std::vector> fitsinfo; // int get_image_rows() { return this->rows; }; // REMOVE // int get_image_cols() { return this->cols; }; // REMOVE - using json = nlohmann::json; - template - void collect_telemetry_key( const std::string &name, const std::string &key, T &value ) { - const std::string function="AstroCam::Interface::collect_telemetry_key"; - std::stringstream message; - - std::string retstring; - - // collect the telemetry from this one named provider - // - collect_telemetry(name, retstring); - - // extract the correct typed value for the requested key from that - // telemetry message - // - try { - // get a JSON message from the serialized return string - // - nlohmann::json jmessage = nlohmann::json::parse( retstring ); - - // extract the value from the JSON message using jkey as the key - // - auto jvalue = jmessage.at( key ); - - if ( jvalue == nullptr ) return; - - if constexpr ( std::is_same::value ) { - if ( jvalue.type() == json::value_t::boolean ) { - value = jvalue.template get(); - } - } - else - if constexpr ( std::is_same::value ) { - if ( jvalue.type() == json::value_t::number_integer ) { - value = jvalue.template get(); - } - } - else - if constexpr ( std::is_same::value ) { - if ( jvalue.type() == json::value_t::number_unsigned ) { - value = jvalue.template get(); - } - } - else - if constexpr ( std::is_same::value || std::is_same::value ) { - if ( jvalue.type() == json::value_t::number_float ) { - value = jvalue.template get(); - } - } - else - if constexpr ( std::is_same::value ) { - if ( jvalue.type() == json::value_t::string ) { - value = jvalue.template get(); - } - } - else { - message << "ERROR unknown type for key " << key << " from provider " << name; - logwrite( function, message.str() ); - return; - } - } - catch( const json::exception &e ) { - message << "JSON exception parsing value for key " << key << " from provider " << name << ": " << e.what(); - logwrite( function, message.str() ); - } - catch( const std::exception &e ) { - message << "ERROR exception parsing value for key " << key << " from provider " << name << ": " << e.what(); - logwrite( function, message.str() ); - } - return; - } }; /***** AstroCam::Interface **************************************************/ diff --git a/camerad/camerad.cpp b/camerad/camerad.cpp index a026a5bc..f0730868 100644 --- a/camerad/camerad.cpp +++ b/camerad/camerad.cpp @@ -184,8 +184,8 @@ int main(int argc, char **argv) { Topic::FLEXURED, Topic::FOCUSD, Topic::POWERD, - Topic::TARGETINFO, Topic::SLITD, + Topic::TARGETINFO, Topic::TCSD, Topic::THERMALD } ) == ERROR ) { logwrite(function, "ERROR initializing publisher-subscriber handler"); @@ -262,7 +262,14 @@ void new_log_day() { auto newlogtime = next_occurrence( 12, 01, 00 ); std::this_thread::sleep_until( newlogtime ); close_log(); - init_log( logpath, Camera::DAEMON_NAME ); + // retry the re-open on a short timer so a transient failure (missing + // datedir, permission/owner drift, full disk) doesn't silence logging + // for ~24h until the next rotation + while ( init_log( logpath, Camera::DAEMON_NAME ) != 0 ) { + std::cerr << get_timestamp() << " (Camera::new_log_day) " + << "ERROR: log rotation failed to open new logfile; retrying in 60s\n"; + std::this_thread::sleep_for( std::chrono::seconds(60) ); + } // ensure it doesn't immediately re-open std::this_thread::sleep_for( std::chrono::seconds(1) ); } @@ -388,8 +395,10 @@ void doit(Network::TcpSocket &sock) { bool connection_open=true; - message.str(""); message << "thread " << sock.id << " accepted connection on fd " << sock.getfd(); +#ifdef LOGLEVEL_DEBUG + message.str(""); message << "[DEBUG] thread " << sock.id << " accepted connection on fd " << sock.getfd(); logwrite( function, message.str() ); +#endif while (connection_open) { memset(buf, '\0', BUFSIZE); // init buffers @@ -412,18 +421,14 @@ void doit(Network::TcpSocket &sock) { // Data available, now read from connected socket... // std::string sbuf; + const int fd_before_read = sock.getfd(); if ( ( ret=sock.Read( sbuf, '\n' ) ) <= 0 ) { - if (ret<0) { // could be an actual read error - message.str(""); message << "Read error on fd " << sock.getfd() << ": " << strerror(errno); logwrite(function, message.str()); - } - if (ret==0) { - message.str(""); message << "timeout reading from fd " << sock.getfd(); - logwrite( function, message.str() ); + if (ret<0) { // real read error + message.str(""); message << "Read error on fd " << fd_before_read << ": " << strerror(errno); + logwrite(function, message.str()); } - break; // Breaking out of the while loop will close the connection. - // This probably means that the client has terminated abruptly, - // having sent FIN but not stuck around long enough - // to accept CLOSE and give the LAST_ACK. + // ret==0 is orderly peer shutdown (TCP FIN); not an error, lower layer logs at DEBUG + break; } // convert the input buffer into a string and remove any trailing linefeed @@ -518,21 +523,6 @@ void doit(Network::TcpSocket &sock) { sock.Write( " " ); ret = NO_ERROR; } - // send telemetry as json message - // - if ( cmd == TELEMREQUEST ) { - if ( args=="?" || args=="help" ) { - retstring=TELEMREQUEST+"\n"; - retstring.append( " Returns a serialized JSON message containing my telemetry\n" ); - retstring.append( " information, terminated with \"EOF\\n\".\n" ); - ret=HELP; - } - else { - server.make_telemetry_message( retstring ); - ret = JSON; - } - } - else if ( cmd == CAMERAD_OPEN ) { ret = server.connect_controller(args, retstring); } diff --git a/camerad/camerad.h b/camerad/camerad.h index 15618461..5d67c9fb 100644 --- a/camerad/camerad.h +++ b/camerad/camerad.h @@ -169,30 +169,6 @@ namespace Camera { applied++; } - // TELEM_PROVIDER : contains daemon name and port to contact for header telemetry info - // - if ( config.param[entry] == "TELEM_PROVIDER" ) { - std::vector tokens; - Tokenize( config.arg[entry], tokens, " " ); - try { - if ( tokens.size() == 2 ) { - this->telemetry_providers[tokens.at(0)] = std::stod(tokens.at(1)); - } - else { - message.str(""); message << "bad format \"" << config.arg[entry] << "\": expected "; - this->camera.log_error( function, message.str() ); - return ERROR; - } - } - catch ( const std::exception &e ) { - message.str(""); message << "parsing TELEM_PROVIDER from " << config.arg[entry] << ": " << e.what(); - this->camera.log_error( function, message.str() ); - return ERROR; - } - message.str(""); message << "config:" << config.param[entry] << "=" << config.arg[entry]; - this->camera.async.enqueue_and_log( "CAMERAD", function, message.str() ); - applied++; - } // ASYNCPORT if (config.param[entry].compare(0, 9, "ASYNCPORT")==0) { diff --git a/common/acamd_commands.h b/common/acamd_commands.h index ae7fd68b..bc68211a 100644 --- a/common/acamd_commands.h +++ b/common/acamd_commands.h @@ -30,7 +30,7 @@ const std::string ACAMD_FAN = "fan"; ///< set Andor fan mode const std::string ACAMD_FILTER = "filter"; ///< filter [ name ] to set or get the filter const std::string ACAMD_GUIDESET = "guideset"; ///< set params for guider display const std::string ACAMD_HOME = "home"; const int ACAMD_HOME_TIMEOUT = 180000; ///< home all motors - const int ACAMD_MOVE_TIMEOUT = 40000; ///< covers filter and cover moves + const int ACAMD_MOVE_TIMEOUT = 60000; ///< covers filter and cover moves (PI motor wait = 40s; +20s slack for reply round-trip) const std::string ACAMD_INIT = "init"; ///< *** const std::string ACAMD_ISACQUIRED = "isacquired"; ///< is the target acquired? const std::string ACAMD_ISHOME = "ishome"; ///< are all motors homed? @@ -100,7 +100,6 @@ const std::vector ACAMD_SYNTAX = { ACAMD_OFFSETGOAL+" [ ? | ]", ACAMD_OFFSETPERIOD+" [ ? | ]", ACAMD_PUTONSLIT+" [ ? | ]", - TELEMREQUEST+" [ ? ]", ACAMD_SHUTDOWN+" [ ? ]", ACAMD_TEST+" ? | ..." }; diff --git a/common/calibd_commands.h b/common/calibd_commands.h index 88ca0e03..35129ad3 100644 --- a/common/calibd_commands.h +++ b/common/calibd_commands.h @@ -31,7 +31,5 @@ const std::vector CALIBD_SYNTAX = { CALIBD_NATIVE+" ", CALIBD_SET+" [ =open|close ... ] | [?]", " LAMP MODULATOR CONTROL", - CALIBD_LAMPMOD+" ? | open | close | reconnect | default | [ [ on|off ] | [ ] ]", - " OTHER", - TELEMREQUEST+" [ ? ]" + CALIBD_LAMPMOD+" ? | open | close | reconnect | default | [ [ on|off ] | [ ] ]" }; diff --git a/common/common.cpp b/common/common.cpp index 149d225c..5d7c08f6 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -44,34 +44,6 @@ namespace Common { /***** Common::Broadcaster::emit ********************************************/ - /***** Common::collect_telemetry ********************************************/ - /** - * @brief send the TELEMREQUEST command to daemon to get telemetry - * @param[in] provider pair contains <"provider", port> - * @param[out] retstring serialized string of json telemetry message - * - */ - void collect_telemetry(const std::pair &provider, std::string &retstring) { - // Instantiate a client to communicate with each daemon, - // constructed with no name, newline termination on command writes, - // and JEOF termination on reply reads. - // - Common::DaemonClient jclient("", "\n", JEOF ); - - // Send the command TELEMREQUEST to each daemon and read back the reply into - // retstring, which will be the serialized JSON telemetry message. - // - jclient.set_name(provider.first); - jclient.set_port(provider.second); - jclient.connect(); - jclient.command(TELEMREQUEST, retstring); - jclient.disconnect(); - - return; - } - /***** Common::collect_telemetry ********************************************/ - - /***** Common::extract_correlation_id ***************************************/ /** * @brief detect and strip a correlation ID prefix from an inter-daemon message @@ -588,9 +560,11 @@ namespace Common { // Do not wait for a reply. // if ( reply == "NOREPLY" ) { - message.str(""); message << "not waiting for reply and closing connection to " << this->name << " socket " << _sock.gethost() +#ifdef LOGLEVEL_DEBUG + message.str(""); message << "[DEBUG] not waiting for reply and closing connection to " << this->name << " socket " << _sock.gethost() << "/" << _sock.getport() << " on fd " << _sock.getfd(); logwrite( function, message.str() ); +#endif _sock.Close(); return( error ); } @@ -632,9 +606,11 @@ namespace Common { // close the connection // - message.str(""); message << "closing connection to " << this->name << " socket " << _sock.gethost() +#ifdef LOGLEVEL_DEBUG + message.str(""); message << "[DEBUG] closing connection to " << this->name << " socket " << _sock.gethost() << "/" << _sock.getport() << " on fd " << _sock.getfd(); logwrite( function, message.str() ); +#endif _sock.Close(); // assign the response to the reply string, passed in by reference @@ -712,13 +688,21 @@ namespace Common { std::stringstream message; long ret; - std::unique_lock lock( this->client_access ); + std::unique_lock lock( this->client_access ); + // Auto-reconnect if the connection has dropped (peer hung up, idle + // timeout, etc.). The recursive mutex allows nested locking when + // this calls connect() under the same lock. Both command() callers + // and direct .send() callers benefit from this single-location fix. + // if ( ! this->socket.isconnected() ) { - message.str(""); message << "ERROR:cannot send \"" << strip_newline(command) << "\" to " << this->name - << " because daemon is not connected"; - logwrite( function, message.str() ); - return ERROR; + if ( this->connect() != NO_ERROR ) { + message.str(""); message << "ERROR:cannot send \"" << strip_newline(command) << "\" to " + << this->name << ": reconnect failed"; + logwrite( function, message.str() ); + std::this_thread::sleep_for( std::chrono::milliseconds(100) ); // rate-limit retry storms + return ERROR; + } } if ( this->socket.getfd() < 1 ) { @@ -758,6 +742,23 @@ namespace Common { command += this->term_write; } + // Drain any stale reply that was buffered from a prior send whose reply was + // never read (e.g. a DONTWAIT send, or a send that timed out before the reply + // arrived and reconnected without draining). Without this, the stale CID-tagged + // reply would be read as the response to the current command and cause a + // mismatch, propagating through every subsequent send until the socket is cycled. + // Safe: client_access mutex is held, so no concurrent sender touches this socket; + // and in the command/reply protocol daemons never push unsolicited TCP data. + // + { + std::string discard; + while ( this->socket.Poll(0) > 0 ) { + if ( this->socket.Read( discard, this->term_read ) <= 0 ) break; + message.str(""); message << "drained stale buffered reply from " << this->name << ": \"" << discard << "\""; + logwrite( function, message.str() ); + } + } + int trys=0; int retry_limit=3; int pollret=0; @@ -841,17 +842,11 @@ namespace Common { // that is no longer pertinent. // if ( this->timedout ) { - logwrite( function, "[TEST] attempting to flush after timeout" ); if ( ( pollret = this->socket.Poll(2000) ) > 0 ) { + ret = ( term_with_string_actual ? socket.Read( reply, term_str_read_actual ) + : socket.Read( reply, term_read ) ); reply.erase( std::remove(reply.begin(), reply.end(), '\r' ), reply.end() ); reply.erase( std::remove(reply.begin(), reply.end(), '\n' ), reply.end() ); - message.str(""); message << "[TEST] I read this: " << reply << " but I'm going to read again!"; - logwrite( function, message.str() ); - ret = ( term_with_string_actual ? socket.Read( reply, term_str_read_actual ) : socket.Read( reply, term_read ) ); - reply.erase( std::remove(reply.begin(), reply.end(), '\r' ), reply.end() ); - reply.erase( std::remove(reply.begin(), reply.end(), '\n' ), reply.end() ); - message.str(""); message << "[TEST] and the 2nd read was this: " << reply; - logwrite( function, message.str() ); } this->timedout=false; } @@ -1102,9 +1097,9 @@ namespace Common { // if ( ( error = this->connect() ) != NO_ERROR ) retstring="ERROR"; else retstring="DONE"; #ifdef LOGLEVEL_DEBUG -// message.str(""); message << "[DEBUG] connected to " << this->name << " socket " << this->socket.gethost() -// << "/" << this->socket.getport() << " on fd " << this->socket.getfd(); -// logwrite( function, message.str() ); + message.str(""); message << "[DEBUG] connected to " << this->name << " socket " << this->socket.gethost() + << "/" << this->socket.getport() << " on fd " << this->socket.getfd(); + logwrite( function, message.str() ); #endif } else @@ -1118,9 +1113,9 @@ namespace Common { // if ( args == "disconnect" ) { #ifdef LOGLEVEL_DEBUG -// message.str(""); message << "[DEBUG] disconnecting " << this->name << " socket " << this->socket.gethost() -// << "/" << this->socket.getport() << " from fd " << this->socket.getfd(); -// logwrite( function, message.str() ); + message.str(""); message << "[DEBUG] disconnecting " << this->name << " socket " << this->socket.gethost() + << "/" << this->socket.getport() << " from fd " << this->socket.getfd(); + logwrite( function, message.str() ); #endif // then close the connection // @@ -1131,27 +1126,16 @@ namespace Common { // all other commands go straight on through, as-is // else { - // but only if the connection is open of course + // send() handles auto-reconnect-if-needed and uses client_access + // internally for serialization. // - if ( !this->socket.isconnected() ) { - message.str(""); message << "ERROR: connection not open to " << this->name; - logwrite( function, message.str() ); - error = ERROR; - } - else { -#ifdef LOGLEVEL_DEBUG -// message.str(""); message << "[DEBUG] sending to " << this->name << " socket " << this->socket.gethost() -// << "/" << this->socket.getport() << " on fd " << this->socket.getfd() << ": " << args; -// logwrite( function, message.str() ); -#endif - error = this->send( args, retstring ); - } + error = this->send( args, retstring ); } #ifdef LOGLEVEL_DEBUG -// message.str(""); message << "[DEBUG] reply from " << this->name << " socket " << this->socket.gethost() -// << "/" << this->socket.getport() << " on fd " << this->socket.getfd() << ": " << retstring; -// logwrite( function, message.str() ); + message.str(""); message << "[DEBUG] reply from " << this->name << " socket " << this->socket.gethost() + << "/" << this->socket.getport() << " on fd " << this->socket.getfd() << ": " << retstring; + logwrite( function, message.str() ); #endif return( error ); @@ -1171,7 +1155,7 @@ namespace Common { std::stringstream message; long error = NO_ERROR; - const std::lock_guard lock( this->client_access ); + const std::lock_guard lock( this->client_access ); // probably a programming error if this Common::DaemonClient object is not configured // @@ -1221,13 +1205,15 @@ namespace Common { std::string function = "Common::DaemonClient::disconnect"; std::stringstream message; - const std::lock_guard lock( this->client_access ); + const std::lock_guard lock( this->client_access ); // close the connection // - message.str(""); message << "closing connection to " << this->name << " socket " << this->socket.gethost() +#ifdef LOGLEVEL_DEBUG + message.str(""); message << "[DEBUG] closing connection to " << this->name << " socket " << this->socket.gethost() << "/" << this->socket.getport() << " on fd " << this->socket.getfd(); logwrite( function, message.str() ); +#endif this->socket.Close(); return; diff --git a/common/common.h b/common/common.h index bee87cb4..4e9b2905 100644 --- a/common/common.h +++ b/common/common.h @@ -35,7 +35,6 @@ const long ABORT = 6; const long EXIT = 999; const std::string JEOF = "EOF\n"; ///< used to terminate JSON messages -const std::string TELEMREQUEST = "sendtelem"; ///< common daemon command used to request telemetry const std::string SNAPSHOT = "snapshot"; ///< common daemon command forces publish of telemetry const std::string CID_PREFIX = "#cid:"; ///< correlation ID marker for inter-daemon commands @@ -71,6 +70,7 @@ namespace Common { zmqpp::context &_context; zmqpp::socket _socket; zmqpp::poller _poller; ///< persistent poller — avoids per-call reconstruction + mutable std::mutex _publish_mtx; ///< zmqpp sockets are NOT thread-safe. Serializes concurrent publish callers Mode _mode; ///< publisher or subscriber? std::string _topic; ///< publisher topic std::vector _topics; ///< list of subscriber topics @@ -190,6 +190,7 @@ namespace Common { if ( _mode != Mode::PUB ) { throw std::runtime_error( "(Common::PubSub::publish) not a publisher" ); } + std::lock_guard lock( _publish_mtx ); // serialize the non-thread-safe socket zmqpp::message message_zmq; // Publish to either class default _topic or topic specified as // optional arg. @@ -436,9 +437,6 @@ namespace Common { /**************** Common::Broadcaster ***************************************/ - void collect_telemetry(const std::pair &provider, std::string &retstring); - - /***** Common::extract_correlation_id ***************************************/ /** * @brief detect and strip a correlation ID prefix from an inter-daemon message @@ -1269,7 +1267,8 @@ namespace Common { */ class DaemonClient { private: - std::mutex client_access; + std::recursive_mutex client_access; // recursive: command() takes it across connect+send+close + char term_write; ///< send adds this char on Writes char term_read; ///< send looks for this char on Reads (if reply requested) std::string term_str_write; ///< optional terminating string for writes diff --git a/common/db_column_defs.h b/common/db_column_defs.h new file mode 100644 index 00000000..b978fdfa --- /dev/null +++ b/common/db_column_defs.h @@ -0,0 +1,32 @@ +/** + * @file db_column_defs.h + * @brief binds JSON message keys to thermald DB column names + * + * Mirrors the pattern in fits_header_defs.h. Whenever a JSON message + * key arriving via ZMQ pub/sub needs to be persisted to a column in + * thermald.externaldata, add an entry here. The jkey value is + * arbitrary (publisher and subscriber just need to agree on the + * constant); the column field MUST match the actual DB schema. + */ +#pragma once + +#include "message_keys.h" + +namespace DbColumnDefs { + + /** + * @struct Column + * @brief binds a JSON message key to a database column name + */ + struct Column { + const char* jkey; ///< key in the inbound JSON message + const char* column; ///< database column name + }; + + const Column Columns[] = { + { Key::Acamd::TANDOR.c_str(), "TANDOR_ACAM" }, + { Key::Slicecamd::TANDOR_L.c_str(), "TANDOR_SCAM_L" }, + { Key::Slicecamd::TANDOR_R.c_str(), "TANDOR_SCAM_R" } + }; + +} diff --git a/common/fits_header_defs.h b/common/fits_header_defs.h index a439ef3d..26f8c4a7 100644 --- a/common/fits_header_defs.h +++ b/common/fits_header_defs.h @@ -83,12 +83,12 @@ namespace FitsHeaderKeys { }; const Primary TargetInfoKeys[] = { - { "OBS_ID", "", "Observation ID", "INT" }, - { "NAME", "", "target name", "STRING" }, - { "SLITA", "", "slit angle in deg", "FLOAT" }, - { "POINTMDE", "", "pointing mode", "STRING" }, - { "RA", "", "requested Right Ascension in J2000", "STRING" }, - { "DECL", "", "requested Declination in J2000", "STRING" } + { Key::TargetInfo::OBS_ID.c_str(), "", "Observation ID", "INT" }, + { Key::TargetInfo::NAME.c_str(), "", "target name", "STRING" }, + { Key::TargetInfo::SLITA.c_str(), "", "slit angle in deg", "FLOAT" }, + { Key::TargetInfo::POINTMODE.c_str(), "POINTMDE", "pointing mode", "STRING" }, + { Key::TargetInfo::RA.c_str(), "", "requested Right Ascension in J2000", "STRING" }, + { Key::TargetInfo::DECL.c_str(), "", "requested Declination in J2000", "STRING" } }; const Primary TcsInfoKeys[] = { diff --git a/common/flexured_commands.h b/common/flexured_commands.h index e3a9eb07..46b71935 100644 --- a/common/flexured_commands.h +++ b/common/flexured_commands.h @@ -27,7 +27,6 @@ const std::vector FLEXURED_SYNTAX = { FLEXURED_NATIVE+" ? | ", FLEXURED_OPEN, FLEXURED_SET+" ? | ", - TELEMREQUEST+" [ ? ]", FLEXURED_TEST+" ? | ...", " motormap", " posmap", diff --git a/common/focusd_commands.h b/common/focusd_commands.h index c6647ef9..05456b32 100644 --- a/common/focusd_commands.h +++ b/common/focusd_commands.h @@ -29,7 +29,6 @@ const std::vector FOCUSD_SYNTAX = { FOCUSD_NATIVE+" ? | ", FOCUSD_OPEN, FOCUSD_SET+" ? | { | nominal }", - TELEMREQUEST+" [ ? ]", FOCUSD_TEST+" ...", " motormap", " posmap", diff --git a/common/message_keys.h b/common/message_keys.h index 328ef249..b3017b8c 100644 --- a/common/message_keys.h +++ b/common/message_keys.h @@ -36,6 +36,7 @@ namespace Topic { inline const std::string SLITD = "slitd"; inline const std::string CAMERAD = "camerad"; inline const std::string ACAMD = "acamd"; + inline const std::string ACAMD_TEMP = "acamd_temp"; inline const std::string CALIBD = "calibd"; inline const std::string FLEXURED = "flexured"; inline const std::string FOCUSD = "focusd"; @@ -63,6 +64,17 @@ namespace Key { inline const std::string SHOULD_FINEACQUIRE = "should_fineacquire"; } + namespace TargetInfo { + inline const std::string OBS_ID = "OBS_ID"; + inline const std::string NAME = "NAME"; + inline const std::string SLITA = "SLITA"; + inline const std::string BINSPECT = "BINSPECT"; + inline const std::string BINSPAT = "BINSPAT"; + inline const std::string POINTMODE = "POINTMODE"; + inline const std::string RA = "RA"; + inline const std::string DECL = "DECL"; + } + namespace Camerad { inline const std::string READY = "ready"; inline const std::string SHUTTERTIME = "shuttime_sec"; @@ -92,6 +104,8 @@ namespace Key { namespace Slicecamd { inline const std::string FINEACQUIRE_LOCKED = "fineacquire_locked"; inline const std::string FINEACQUIRE_RUNNING = "fineacquire_running"; + inline const std::string TANDOR_L = "tandor_L"; + inline const std::string TANDOR_R = "tandor_R"; } namespace Slitd { diff --git a/common/powerd_commands.h b/common/powerd_commands.h index da8ef3fa..ae45aa67 100644 --- a/common/powerd_commands.h +++ b/common/powerd_commands.h @@ -21,7 +21,6 @@ const std::vector POWERD_SYNTAX = { POWERD_OPEN, POWERD_REOPEN+" [?]", POWERD_STATUS+" [?]", - TELEMREQUEST+" [?]", "", " [ ON | OFF | BOOT ]", " [ ON | OFF | BOOT ]" diff --git a/common/sequencerd_commands.h b/common/sequencerd_commands.h index e42d9202..b69dce30 100644 --- a/common/sequencerd_commands.h +++ b/common/sequencerd_commands.h @@ -59,7 +59,6 @@ const std::vector SEQUENCERD_SYNTAX = { SEQUENCERD_PAUSE, SEQUENCERD_REPEAT, SEQUENCERD_RESUME, - TELEMREQUEST+" [?]", SEQUENCERD_USERCONTINUE, SEQUENCERD_SHUTDOWN, SEQUENCERD_START, diff --git a/common/skyinfo.cpp b/common/skyinfo.cpp index f3c70504..31d90546 100644 --- a/common/skyinfo.cpp +++ b/common/skyinfo.cpp @@ -255,11 +255,11 @@ namespace SkyInfo { } #ifdef LOGLEVEL_DEBUG -// message.str(""); message << "[ACQUIRE] coords_out.ra=" << std::fixed << std::setprecision(6) -// << this->coords_out.ra << " .dec=" -// << this->coords_out.dec << " .angle=" -// << this->coords_out.angle << " deg"; -// logwrite( function, message.str() ); + message.str(""); message << "[ACQUIRE] coords_out.ra=" << std::fixed << std::setprecision(6) + << this->coords_out.ra << " .dec=" + << this->coords_out.dec << " .angle=" + << this->coords_out.angle << " deg"; + logwrite( function, message.str() ); #endif PyGILState_Release( gstate ); diff --git a/common/slitd_commands.h b/common/slitd_commands.h index ffe3b121..d892dea0 100644 --- a/common/slitd_commands.h +++ b/common/slitd_commands.h @@ -27,8 +27,7 @@ const std::vector SLITD_SYNTAX = { SLITD_NATIVE+" ? | [ ]", SLITD_OFFSET+" ? | ", SLITD_OPEN+" [ ? ]", - SLITD_SET+" ? | [ ]", - TELEMREQUEST+" [?]" + SLITD_SET+" ? | [ ]" }; #endif diff --git a/common/tcsd_commands.h b/common/tcsd_commands.h index 01fa1f85..a34968c6 100644 --- a/common/tcsd_commands.h +++ b/common/tcsd_commands.h @@ -56,7 +56,6 @@ const std::vector TCSD_SYNTAX = { TCSD_RETOFFSETS+" [ ? ]", TCSD_RINGGO+" ? | ", TCSD_SET_FOCUS+" ? | ", - TELEMREQUEST+" [ ? ]", TCSD_WEATHER_COORDS+" [ ? ]", TCSD_ZERO_OFFSETS+" [ ? ]" }; diff --git a/common/thermald_commands.h b/common/thermald_commands.h index 136c837a..00161418 100644 --- a/common/thermald_commands.h +++ b/common/thermald_commands.h @@ -25,7 +25,6 @@ const std::vector THERMALD_SYNTAX = { THERMALD_NATIVE+" [ ? | [] ]", THERMALD_PRINTLABELS+" [?]", THERMALD_RECONNECT+" [ ? ]", - TELEMREQUEST+" [?]", THERMALD_SETPOINT+" [ ? | [ ] ]", THERMALD_SHOWTELEM+" [ ? | force ]", THERMALD_TELEMETRY+" ? | start | stop | status " diff --git a/emulator/CMakeLists.txt b/emulator/CMakeLists.txt index 91dd386e..1f1f5c9e 100644 --- a/emulator/CMakeLists.txt +++ b/emulator/CMakeLists.txt @@ -10,7 +10,7 @@ set( EMULATOR_DIR ${PROJECT_BASE_DIR}/emulator ) set( CMAKE_CXX_STANDARD 17 ) -add_definitions( -Wall -ansi -O0 -Wno-variadic-macros ) +add_definitions( -Wall -O0 -Wno-variadic-macros ) include_directories( ${EMULATOR_DIR} ) include_directories( ${PROJECT_BASE_DIR}/utils ) diff --git a/emulator/emulatord_acam.cpp b/emulator/emulatord_acam.cpp index 356529a3..cea24f58 100644 --- a/emulator/emulatord_acam.cpp +++ b/emulator/emulatord_acam.cpp @@ -46,7 +46,7 @@ void signal_handler( int signo ) { int main( int argc, char **argv ); // main thread (just gets things started) void block_main( Network::TcpSocket sock ); // this thread handles requests on blocking port -void doit( Network::TcpSocket sock ); // the worker thread +void doit( Network::TcpSocket &sock ); // the worker thread /***** main *******************************************************************/ @@ -209,7 +209,7 @@ void block_main( Network::TcpSocket sock ) { * [all|] [_BLOCK_] [] * */ -void doit( Network::TcpSocket sock ) { +void doit( Network::TcpSocket &sock ) { std::string function = " (AcamEmulator::doit) "; long ret; std::stringstream message; diff --git a/emulator/emulatord_andorserver.cpp b/emulator/emulatord_andorserver.cpp index 3ed64356..1636554a 100644 --- a/emulator/emulatord_andorserver.cpp +++ b/emulator/emulatord_andorserver.cpp @@ -46,7 +46,7 @@ void signal_handler( int signo ) { int main( int argc, char **argv ); // main thread (just gets things started) void block_main( Network::TcpSocket sock ); // this thread handles requests on blocking port -void doit( Network::TcpSocket sock ); // the worker thread +void doit( Network::TcpSocket &sock ); // the worker thread /***** main *******************************************************************/ @@ -201,7 +201,7 @@ void block_main( Network::TcpSocket sock ) { * [all|] [_BLOCK_] [] * */ -void doit( Network::TcpSocket sock ) { +void doit( Network::TcpSocket &sock ) { std::string function = " (AndorServerEmulator::doit) "; long ret; std::stringstream message; diff --git a/emulator/emulatord_calib.cpp b/emulator/emulatord_calib.cpp index 5056a6ec..7670b3cd 100644 --- a/emulator/emulatord_calib.cpp +++ b/emulator/emulatord_calib.cpp @@ -46,7 +46,7 @@ void signal_handler( int signo ) { int main( int argc, char **argv ); ///< main thread (just gets things started) void block_main( Network::TcpSocket sock ); ///< this thread handles requests on blocking port -void doit( Network::TcpSocket sock ); ///< the worker thread +void doit( Network::TcpSocket &sock ); ///< the worker thread /***** main *******************************************************************/ @@ -203,7 +203,7 @@ void block_main( Network::TcpSocket sock ) { * [all|] [_BLOCK_] [] * */ -void doit( Network::TcpSocket sock ) { +void doit( Network::TcpSocket &sock ) { std::string function = " (CalibEmulator::doit) "; long ret; std::stringstream message; diff --git a/emulator/emulatord_camera.cpp b/emulator/emulatord_camera.cpp index f15ba6cc..4c5bb0ae 100644 --- a/emulator/emulatord_camera.cpp +++ b/emulator/emulatord_camera.cpp @@ -46,7 +46,7 @@ void signal_handler( int signo ) { int main( int argc, char **argv ); // main thread (just gets things started) void block_main( Network::TcpSocket sock ); // this thread handles requests on blocking port -void doit( Network::TcpSocket sock ); // the worker thread +void doit( Network::TcpSocket &sock ); // the worker thread /** main *********************************************************************/ @@ -200,7 +200,7 @@ void block_main( Network::TcpSocket sock ) { * [all|] [_BLOCK_] [] * */ -void doit( Network::TcpSocket sock ) { +void doit( Network::TcpSocket &sock ) { std::string function = " (Emulator::doit) "; long ret; std::stringstream message; diff --git a/emulator/emulatord_filter.cpp b/emulator/emulatord_filter.cpp index ed525942..309bce03 100644 --- a/emulator/emulatord_filter.cpp +++ b/emulator/emulatord_filter.cpp @@ -46,7 +46,7 @@ void signal_handler( int signo ) { int main( int argc, char **argv ); // main thread (just gets things started) void block_main( Network::TcpSocket sock ); // this thread handles requests on blocking port -void doit( Network::TcpSocket sock ); // the worker thread +void doit( Network::TcpSocket &sock ); // the worker thread /** main *********************************************************************/ @@ -200,7 +200,7 @@ void block_main( Network::TcpSocket sock ) { * [all|] [_BLOCK_] [] * */ -void doit( Network::TcpSocket sock ) { +void doit( Network::TcpSocket &sock ) { std::string function = " (Emulator::doit) "; long ret; std::stringstream message; diff --git a/emulator/emulatord_flexure.cpp b/emulator/emulatord_flexure.cpp index b5c7ce58..026000cb 100644 --- a/emulator/emulatord_flexure.cpp +++ b/emulator/emulatord_flexure.cpp @@ -50,7 +50,7 @@ void signal_handler( int signo ) { int main( int argc, char **argv ); // main thread (just gets things started) void block_main( Network::TcpSocket sock ); // this thread handles requests on blocking port -void doit( Network::TcpSocket sock ); // the worker thread +void doit( Network::TcpSocket &sock ); // the worker thread /** main *********************************************************************/ @@ -211,7 +211,7 @@ void block_main(Network::TcpSocket sock) { * [all|] [_BLOCK_] [] * */ -void doit(Network::TcpSocket sock) { +void doit(Network::TcpSocket &sock) { std::string function = " (Emulator::doit) "; char buf[BUFSIZE+1]; long ret; diff --git a/emulator/emulatord_focus.cpp b/emulator/emulatord_focus.cpp index 30a52c49..7c6e8251 100644 --- a/emulator/emulatord_focus.cpp +++ b/emulator/emulatord_focus.cpp @@ -46,7 +46,7 @@ void signal_handler( int signo ) { int main( int argc, char **argv ); // main thread (just gets things started) void block_main( Network::TcpSocket sock ); // this thread handles requests on blocking port -void doit( Network::TcpSocket sock ); // the worker thread +void doit( Network::TcpSocket &sock ); // the worker thread /** main *********************************************************************/ @@ -204,7 +204,7 @@ void block_main( Network::TcpSocket sock ) { * [all|] [_BLOCK_] [] * */ -void doit( Network::TcpSocket sock ) { +void doit( Network::TcpSocket &sock ) { std::string function = " (Emulator::doit) "; long ret; std::stringstream message; diff --git a/emulator/emulatord_power.cpp b/emulator/emulatord_power.cpp index 91611d99..f3e71e58 100644 --- a/emulator/emulatord_power.cpp +++ b/emulator/emulatord_power.cpp @@ -46,7 +46,7 @@ void signal_handler( int signo ) { int main( int argc, char **argv ); // main thread (just gets things started) void block_main( Network::TcpSocket sock ); // this thread handles requests on blocking port -void doit( Network::TcpSocket sock ); // the worker thread +void doit( Network::TcpSocket &sock ); // the worker thread /***** main ******************************************************************/ @@ -209,7 +209,7 @@ void block_main( Network::TcpSocket sock ) { * [all|] [_BLOCK_] [] * */ -void doit( Network::TcpSocket sock ) { +void doit( Network::TcpSocket &sock ) { std::string function = " (PowerEmulator::doit) "; long ret; std::stringstream message; diff --git a/emulator/emulatord_slit.cpp b/emulator/emulatord_slit.cpp index 2aedffd0..86b19574 100644 --- a/emulator/emulatord_slit.cpp +++ b/emulator/emulatord_slit.cpp @@ -46,7 +46,7 @@ void signal_handler( int signo ) { int main( int argc, char **argv ); // main thread (just gets things started) void block_main( Network::TcpSocket sock ); // this thread handles requests on blocking port -void doit( Network::TcpSocket sock ); // the worker thread +void doit( Network::TcpSocket &sock ); // the worker thread /***** main *******************************************************************/ @@ -216,7 +216,7 @@ void block_main( Network::TcpSocket sock ) { * [all|] [_BLOCK_] [] * */ -void doit( Network::TcpSocket sock ) { +void doit( Network::TcpSocket &sock ) { std::string function = " (SlitEmulator::doit) "; long ret; std::stringstream message; diff --git a/emulator/emulatord_tcs.h b/emulator/emulatord_tcs.h index 71ff13fa..6c55c821 100644 --- a/emulator/emulatord_tcs.h +++ b/emulator/emulatord_tcs.h @@ -68,7 +68,7 @@ namespace TcsEmulator { TcsEmulator::Interface interface; ///< create an emulater interface static void block_main( TcsEmulator::Server &server, Network::TcpSocket sock ); ///< main function for blocking connection thread - static void doit(Network::TcpSocket sock); ///< the workhorse of each thread connetion + static void doit(Network::TcpSocket &sock); ///< the workhorse of each thread connetion void exit_cleanly(); inline void initialize_python_objects() { ///< allows for initializing Python objects by the child process diff --git a/emulator/tcs.cpp b/emulator/tcs.cpp index 7ea844e7..4c535e5f 100644 --- a/emulator/tcs.cpp +++ b/emulator/tcs.cpp @@ -97,7 +97,7 @@ namespace TcsEmulator { * [all|] [_BLOCK_] [] * */ - void Server::doit( Network::TcpSocket sock ) { + void Server::doit( Network::TcpSocket &sock ) { std::string function = " (TcsEmulator::Server::doit) "; long ret; std::stringstream message; @@ -1119,6 +1119,10 @@ namespace TcsEmulator { } } else + if ( mycmd == "Z" ) { + retstring = "0"; // successful completion + } + else if ( mycmd == "RET" ) { retstring = "0"; // successful completion } diff --git a/flexured/flexure_interface.cpp b/flexured/flexure_interface.cpp index 626eaa9c..e44c036f 100644 --- a/flexured/flexure_interface.cpp +++ b/flexured/flexure_interface.cpp @@ -322,10 +322,6 @@ namespace Flexure { return HELP; } - // get the needed telemetry (telescope position and temperatures) - // - this->get_external_telemetry(); - // perform the calculations // retstring="not_yet_implemented"; @@ -390,164 +386,93 @@ namespace Flexure { /***** Flexure::Interface::send_command *************************************/ - /***** Flexure::Interface::make_telemetry_message ***************************/ + /***** Flexure::Interface::get_status **************************************/ /** - * @brief assembles a telemetry message - * @details This creates a JSON message for telemetry info, then serializes - * it into a std::string ready to be sent over a socket. - * @param[out] retstring string containing the serialization of the JSON message + * @brief read all flexure actuator positions into status * */ - void Interface::make_telemetry_message( std::string &retstring ) { - const std::string function="Flexure::Interface::make_telemetry_message"; + void Interface::get_status() { + const std::string function="Flexure::Interface::get_status"; std::stringstream message; - // assemble the telemetry into a json message - // Set a messagetype keyword to indicate what kind of message this is. - // - nlohmann::json jmessage; - jmessage["messagetype"]="flexureinfo"; + this->status.positions.clear(); - // get all flexure actuator positions - // auto _motormap = this->motorinterface.get_motormap(); - - // loop through all motors in motormap for ( const auto &mot : _motormap ) { - // loop through all axes for each motor + bool connected = this->motorinterface.is_connected( mot.second.name ); for ( const auto &axis : mot.second.axes ) { auto chan = mot.second.name; auto addr = mot.second.addr; float position = NAN; std::string posname; std::string key; - this->motorinterface.get_pos( chan, axis.second.axisnum, addr, position, posname ); + if ( connected ) { + this->motorinterface.get_pos( chan, axis.second.axisnum, addr, position, posname ); + } switch ( axis.second.axisnum ) { case 1 : key = "FLXPIS_" + chan; break; - case 2: key = "FLXSPE_" + chan; break; - case 3: key = "FLXSPA_" + chan; break; - default: key = "error"; - message.str(""); message << "ERROR unknown axis " << axis.second.axisnum; + case 2 : key = "FLXSPE_" + chan; break; + case 3 : key = "FLXSPA_" + chan; break; + default: message.str(""); message << "ERROR unknown axis " << axis.second.axisnum; logwrite( function, message.str() ); + continue; } - - // assign the position or NaN to a key in the JSON jmessage - // - if ( !std::isnan(position) ) jmessage[key]=position; else jmessage[key]="NAN"; + this->status.positions[key] = position; } } - - retstring = jmessage.dump(); // serialize the json message into retstring - - retstring.append(JEOF); // append the JSON message terminator - - return; } - /***** Flexure::Interface::make_telemetry_message ***************************/ + /***** Flexure::Interface::get_status **************************************/ - /***** Flexure::Interface::get_external_telemetry ***************************/ + /***** Flexure::Interface::publish_status **********************************/ /** - * @brief collect telemetry from another daemon - * @details This is used for any telemetry that I need to collect from - * another daemon. Send the command "sendtelem" to the daemon, which - * will respond with a JSON message. The daemon(s) to contact - * are configured with the TELEM_PROVIDER key in the config file. + * @brief publish flexure state, but only if it changed (or forced) + * @param[in] force optional (default=false) publish irrespective of change * */ - void Interface::get_external_telemetry() { + void Interface::publish_status( bool force ) { + // Serialize publish-on-change; held across get_status() hardware I/O for now @TODO revisit + std::lock_guard lock( this->publish_mutex ); - // Loop through each configured telemetry provider. This requests - // their telemetry which is returned as a serialized json string - // held in retstring. + // refresh current state from hardware // - // handle_json_message() will parse the serialized json string. + this->get_status(); + + // unless forced, only publish if the state changed // - std::string retstring; - for ( const auto &provider : this->telemetry_providers ) { - Common::collect_telemetry( provider, retstring ); - handle_json_message(retstring); + if ( !force && this->status == this->last_published_status ) return; + + nlohmann::json jmessage; + jmessage[Key::SOURCE] = Topic::FLEXURED; + for ( const auto &[key,pos] : this->status.positions ) { + if ( !std::isnan(pos) ) jmessage[key] = pos; else jmessage[key] = "NAN"; + } + + this->last_published_status = this->status; + + try { + this->publisher->publish( jmessage, Topic::FLEXURED ); + } + catch( const std::exception &e ) { + logwrite( "Flexure::Interface::publish_status", + "ERROR publishing message: "+std::string(e.what()) ); } - return; } - /***** Flexure::Interface::get_external_telemetry ***************************/ + /***** Flexure::Interface::publish_status **********************************/ - /***** Flexure::Interface::handle_json_message ******************************/ + /***** Flexure::Interface::handletopic_snapshot ****************************/ /** - * @brief parses incoming telemetry messages - * @details Requesting telemetry from another daemon returns a serialized - * JSON message which needs to be passed in here to parse it. - * @param[in] message_in incoming serialized JSON message (as a string) - * @return ERROR | NO_ERROR + * @brief respond to a snapshot request by publishing my status + * @param[in] jmessage subscribed-received JSON message * */ - long Interface::handle_json_message( std::string message_in ) { - const std::string function="Flexure::Interface::handle_json_message"; - std::stringstream message; - - try { - nlohmann::json jmessage = nlohmann::json::parse( message_in ); - std::string messagetype; - - // jmessage must not contain key "error" and must contain key "messagetype" - // - if ( !jmessage.contains("error") ) { - if ( jmessage.contains("messagetype") && jmessage["messagetype"].is_string() ) { - messagetype = jmessage["messagetype"]; - } - else { - logwrite( function, "ERROR received JSON message with missing or invalid messagetype" ); - return ERROR; - } - } - else { - logwrite( function, "ERROR in JSON message" ); - return ERROR; - } - - // no errors, so disseminate the message contents based on the message type - // - if ( messagetype == "thermalinfo" ) { - double TCOLL_I=NAN, TCOLL_R=NAN, TCOLL_G=NAN, TCOLL_U=NAN; - Common::extract_telemetry_value( message_in, "TCOLL_I", TCOLL_I ); - Common::extract_telemetry_value( message_in, "TCOLL_R", TCOLL_R ); - Common::extract_telemetry_value( message_in, "TCOLL_G", TCOLL_G ); - Common::extract_telemetry_value( message_in, "TCOLL_U", TCOLL_U ); - message.str(""); message << "TCOLL_I=" << TCOLL_I << " TCOLL_R=" << TCOLL_R << " TCOLL_G=" << TCOLL_G << " TCOLL_U=" << TCOLL_U; - logwrite( function, message.str() ); - } - else - if ( messagetype == "tcsinfo" ) { - double casangle=NAN, alt=NAN; - Common::extract_telemetry_value( message_in, Key::Tcsd::CASANGLE, casangle ); - Common::extract_telemetry_value( message_in, Key::Tcsd::ALT, alt ); - message.str(""); message << "casangle=" << casangle << " alt=" << alt; - logwrite( function, message.str() ); - } - else - if ( messagetype == "test" ) { - } - else { - message.str(""); message << "ERROR received unhandled JSON message type \"" << messagetype << "\""; - logwrite( function, message.str() ); - return ERROR; - } - } - catch ( const nlohmann::json::parse_error &e ) { - message.str(""); message << "ERROR json exception parsing message: " << e.what(); - logwrite( function, message.str() ); - return ERROR; + void Interface::handletopic_snapshot( const nlohmann::json &jmessage ) { + if ( jmessage.contains( Topic::FLEXURED ) ) { + this->publish_status(); } - catch ( const std::exception &e ) { - message.str(""); message << "ERROR parsing message: " << e.what(); - logwrite( function, message.str() ); - return ERROR; - } - - return NO_ERROR; } - /***** Flexure::Interface::handle_json_message ******************************/ + /***** Flexure::Interface::handletopic_snapshot ****************************/ /***** Flexure::Interface::test *********************************************/ diff --git a/flexured/flexure_interface.h b/flexured/flexure_interface.h index 291f22e9..0b72a76c 100644 --- a/flexured/flexure_interface.h +++ b/flexured/flexure_interface.h @@ -17,6 +17,7 @@ #include #include #include +#include #define FLEXURE_MOVE_TIMEOUT 1000 ///< timeout in msec for moves #define FLEXURE_POSNAME_TOLERANCE 0.0001 ///< tolerance to determine posname from position @@ -42,13 +43,71 @@ namespace Flexure { */ class Interface { private: + zmqpp::context context; size_t numdev; bool class_initialized; - public: - Interface() : numdev(-1), motorinterface( FLEXURE_MOVE_TIMEOUT, 0, FLEXURE_POSNAME_TOLERANCE ) {} + /** + * @struct Status + * @brief published flexure state: actuator position (um) by FLX_ key; NaN if unavailable + */ + struct Status { + std::map positions; + bool operator==(const Status &o) const { + if ( positions.size() != o.positions.size() ) return false; + for ( const auto &[k,v] : positions ) { + auto it = o.positions.find(k); + if ( it == o.positions.end() ) return false; + if ( std::isnan(v) && std::isnan(it->second) ) continue; // NaN==NaN treated equal + if ( v != it->second ) return false; + } + return true; + } + bool operator!=(const Status &o) const { return !(*this == o); } + }; + Status status; ///< current flexure state + Status last_published_status; ///< last published flexure state + std::mutex publish_mutex; ///< serializes publish-on-change; held over get_status() — @TODO revisit + + public: - std::map telemetry_providers; ///< map of port[daemon_name] for external telemetry providers + Interface() + : context(), + numdev(-1), + is_subscriber_thread_running(false), + should_subscriber_thread_run(false), + motorinterface( FLEXURE_MOVE_TIMEOUT, 0, FLEXURE_POSNAME_TOLERANCE ) + { + topic_handlers = { + { Topic::SNAPSHOT, std::function( + [this](const nlohmann::json &msg) { handletopic_snapshot(msg); } ) } + }; + } + + std::unique_ptr publisher; ///< publisher object + std::string publisher_address; ///< publish socket endpoint + std::string publisher_topic; ///< my default topic for publishing + std::unique_ptr subscriber; ///< subscriber object + std::string subscriber_address; ///< subscribe socket endpoint + std::vector subscriber_topics; ///< list of topics I subscribe to + std::atomic is_subscriber_thread_running; ///< is my subscriber thread running? + std::atomic should_subscriber_thread_run; ///< should my subscriber thread run? + std::unordered_map> topic_handlers; + ///< maps a handler function to each topic + + long init_pubsub(const std::initializer_list &topics={}) { + if (!subscriber) { + subscriber = std::make_unique(context, Common::PubSub::Mode::SUB); + } + return Common::PubSubHandler::init_pubsub(context, *this, topics); + } + void start_subscriber_thread() { Common::PubSubHandler::start_subscriber_thread(*this); } + void stop_subscriber_thread() { Common::PubSubHandler::stop_subscriber_thread(*this); } + + void handletopic_snapshot( const nlohmann::json &jmessage ); ///< respond to a snapshot request + void get_status(); ///< refresh status from hardware + void publish_status( bool force=false ); ///< publish flexure state on change (or force) Common::Queue async; @@ -68,9 +127,6 @@ namespace Flexure { long stop(); ///< send the stop-all-motion command to all controllers long send_command( const std::string &name, std::string cmd ); ///< writes the raw command as received to the master controller, no reply long send_command( const std::string &name, std::string cmd, std::string &retstring ); ///< writes command?, reads reply - void make_telemetry_message( std::string &retstring ); ///< assembles a telemetry message - void get_external_telemetry(); ///< collect telemetry from other daemon(s) - long handle_json_message( std::string message_in ); ///< parses incoming telemetry messages long test( std::string args, std::string &retstring ); ///< test routines std::mutex pi_mutex; ///< mutex to protect multi-threaded access to PI controller diff --git a/flexured/flexure_server.cpp b/flexured/flexure_server.cpp index e3be7f59..dcb4b6ad 100644 --- a/flexured/flexure_server.cpp +++ b/flexured/flexure_server.cpp @@ -17,6 +17,9 @@ namespace Flexure { */ void Server::exit_cleanly(void) { std::string function = "Flexure::Server::exit_cleanly"; + + this->interface.stop_subscriber_thread(); + logwrite( function, "exiting" ); exit(EXIT_SUCCESS); @@ -116,6 +119,28 @@ namespace Flexure { applied++; } + // PUB_ENDPOINT -- my ZeroMQ socket endpoint for publishing telemetry + // SUB_ENDPOINT -- the broker endpoint I subscribe to (for snapshot requests) + // + // NOTE: these two keys must be present in the flexured config file for + // publishing to work. Without PUB_ENDPOINT, init_pubsub() fails and + // no telemetry is published on Topic::FLEXURED. + // + if ( config.param[entry] == "PUB_ENDPOINT" ) { + this->interface.publisher_address = config.arg[entry]; + this->interface.publisher_topic = DAEMON_NAME; // default publish topic is my name + message.str(""); message << "FLEXURED:config:" << config.param[entry] << "=" << config.arg[entry]; + this->interface.async.enqueue_and_log( function, message.str() ); + applied++; + } + + if ( config.param[entry] == "SUB_ENDPOINT" ) { + this->interface.subscriber_address = config.arg[entry]; + message.str(""); message << "FLEXURED:config:" << config.param[entry] << "=" << config.arg[entry]; + this->interface.async.enqueue_and_log( function, message.str() ); + applied++; + } + // MOTOR_CONTROLLER -- address and name of each PI motor controller in daisy-chain // Each CONTROLLER is stored in an STL map indexed by motorname // @@ -161,31 +186,6 @@ namespace Flexure { } } - // TELEM_PROVIDER : contains daemon name and port to contact for header telemetry info - // - if ( config.param[entry] == "TELEM_PROVIDER" ) { - std::vector tokens; - Tokenize( config.arg[entry], tokens, " " ); - try { - if ( tokens.size() == 2 ) { - this->interface.telemetry_providers[tokens.at(0)] = std::stod(tokens.at(1)); - } - else { - message.str(""); message << "ERROR bad format TELEM_PROVIDER=\"" << config.arg[entry] << "\": expected "; - logwrite( function, message.str() ); - return ERROR; - } - } - catch ( const std::exception &e ) { - message.str(""); message << "ERROR parsing TELEM_PROVIDER from " << config.arg[entry] << ": " << e.what(); - logwrite( function, message.str() ); - return ERROR; - } - message.str(""); message << "config:" << config.param[entry] << "=" << config.arg[entry]; - this->interface.async.enqueue_and_log( to_uppercase(DAEMON_NAME), function, message.str() ); - applied++; - } - } // end loop through the entries in the configuration file message.str(""); @@ -222,7 +222,14 @@ namespace Flexure { auto newlogtime = next_occurrence( 12, 01, 00 ); std::this_thread::sleep_until( newlogtime ); close_log(); - init_log( logpath, Flexure::DAEMON_NAME ); + // retry the re-open on a short timer so a transient failure (missing + // datedir, permission/owner drift, full disk) doesn't silence logging + // for ~24h until the next rotation + while ( init_log( logpath, Flexure::DAEMON_NAME ) != 0 ) { + std::cerr << get_timestamp() << " (Flexure::Server::new_log_day) " + << "ERROR: log rotation failed to open new logfile; retrying in 60s\n"; + std::this_thread::sleep_for( std::chrono::seconds(60) ); + } // ensure it doesn't immediately re-open std::this_thread::sleep_for( std::chrono::seconds(1) ); } @@ -543,22 +550,6 @@ namespace Flexure { } else - // send telemetry upon request - // - if ( cmd == TELEMREQUEST ) { - if ( args=="?" || args=="help" ) { - retstring=TELEMREQUEST+"\n"; - retstring.append( " Returns a serialized JSON message containing telemetry\n" ); - retstring.append( " information, terminated with \"EOF\\n\".\n" ); - ret=HELP; - } - else { - this->interface.make_telemetry_message( retstring ); - ret = JSON; - } - } - else - // test routines // if ( cmd == FLEXURED_TEST ) { @@ -611,6 +602,8 @@ namespace Flexure { if ( sock.Write( retstring ) < 0 ) connection_open=false; } + if ( ret==NO_ERROR ) this->interface.publish_status(); + if (!sock.isblocking()) break; // Non-blocking connection exits immediately. // Keep blocking connection open for interactive session. } diff --git a/flexured/flexured.cpp b/flexured/flexured.cpp index f78cefb6..8ee6fd6a 100644 --- a/flexured/flexured.cpp +++ b/flexured/flexured.cpp @@ -122,6 +122,18 @@ int main(int argc, char **argv) { flexured.exit_cleanly(); } + // initialize the pub/sub handler + // + if ( flexured.interface.init_pubsub() == ERROR ) { + logwrite(function, "ERROR initializing publisher-subscriber handler"); + flexured.exit_cleanly(); + } + std::this_thread::sleep_for(std::chrono::milliseconds(250)); + + // publish current state so the world knows I'm online + // + flexured.interface.publish_status( true ); + // This will pre-thread N_THREADS threads. // The 0th thread is reserved for the blocking port, and the rest are for the non-blocking port. // Each thread gets a socket object. All of the socket objects are stored in a vector container. diff --git a/focusd/focus_interface.cpp b/focusd/focus_interface.cpp index 0c735641..67c48e05 100644 --- a/focusd/focus_interface.cpp +++ b/focusd/focus_interface.cpp @@ -537,25 +537,13 @@ namespace Focus { /***** Focus::Interface::send_command ***************************************/ - /***** Focus::Interface::make_telemetry_message *****************************/ + /***** Focus::Interface::get_status *****************************************/ /** - * @brief assembles a telemetry message - * @details This creates a JSON message for telemetry info, then serializes - * it into a std::string ready to be sent over a socket. - * @param[out] retstring string containing the serialization of the JSON message + * @brief read current focus positions into status * */ - void Interface::make_telemetry_message( std::string &retstring ) { - const std::string function="Focus::Interface::make_telemetry_message"; - - // assemble the telemetry into a json message - // Set a messagetype keyword to indicate what kind of message this is. - // - nlohmann::json jmessage; - jmessage["messagetype"]="focusinfo"; - - // get focus position for each motor - // + void Interface::get_status() { + this->status.positions.clear(); auto _motormap = this->motorinterface.get_motormap(); for ( const auto &mot : _motormap ) { auto name = mot.second.name; @@ -563,34 +551,55 @@ namespace Focus { auto addr = mot.second.addr; float position = NAN; std::string posname; - this->motorinterface.get_pos( name, axis, addr, position, posname ); + if ( this->motorinterface.is_connected( name ) ) { + this->motorinterface.get_pos( name, axis, addr, position, posname ); + } + this->status.positions[ "FOCUS"+mot.first ] = position; + } + } + /***** Focus::Interface::get_status *****************************************/ - std::string key = "FOCUS" + mot.first; - // assign the position or NaN to a key in the JSON jmessage - // - if ( !std::isnan(position) ) jmessage[key]=position; else jmessage[key]="NAN"; - } + /***** Focus::Interface::publish_status ***********************************/ + /** + * @brief publish focus state, but only if it changed (or forced) + * @param[in] force optional (default=false) publish irrespective of change + * + */ + void Interface::publish_status( bool force ) { + // Serialize publish-on-change; held across get_status() hardware I/O for now @TODO revisit + std::lock_guard lock( this->publish_mutex ); - retstring = jmessage.dump(); // serialize the json message into retstring + // refresh current state from hardware + // + this->get_status(); - this->publisher->publish(retstring); + // unless forced, only publish if the state changed + // + if ( !force && this->status == this->last_published_status ) return; + + nlohmann::json jmessage; + jmessage[Key::SOURCE] = Topic::FOCUSD; + for ( const auto &[key,pos] : this->status.positions ) { + if ( !std::isnan(pos) ) jmessage[key] = pos; else jmessage[key] = "NAN"; + } - retstring.append(JEOF); // append the JSON message terminator + this->last_published_status = this->status; - return; + try { + this->publisher->publish( jmessage ); + } + catch( const std::exception &e ) { + logwrite( "Focus::Interface::publish_status", + "ERROR publishing message: "+std::string(e.what()) ); + } } - /***** Focus::Interface::make_telemetry_message *****************************/ + /***** Focus::Interface::publish_status ***********************************/ void Interface::handletopic_snapshot( const nlohmann::json &jmessage ) { - if ( jmessage.contains( Focus::DAEMON_NAME ) ) { - std::string dontcare; - this->make_telemetry_message(dontcare); - } - else - if ( jmessage.contains( "test" ) ) { - logwrite( "Focusd::Interface::handletopic_snapshot", jmessage.dump() ); + if ( jmessage.contains( Topic::FOCUSD ) ) { + this->publish_status(); } } diff --git a/focusd/focus_interface.h b/focusd/focus_interface.h index 5bc6183e..931400d3 100644 --- a/focusd/focus_interface.h +++ b/focusd/focus_interface.h @@ -17,6 +17,7 @@ #include #include #include +#include #define FOCUS_MOVE_TIMEOUT 5000 ///< timeout in msec for moves #define FOCUS_HOME_TIMEOUT 5000 ///< timeout in msec for home @@ -46,6 +47,31 @@ namespace Focus { zmqpp::context context; size_t numdev; bool class_initialized; + + /** + * @struct Status + * @brief published focus state: focus position (mm) by channel; NaN if unavailable + */ + struct Status { + std::map positions; + bool operator==(const Status &o) const { + if ( positions.size() != o.positions.size() ) return false; + for ( const auto &[k,v] : positions ) { + auto it = o.positions.find(k); + if ( it == o.positions.end() ) return false; + if ( std::isnan(v) && std::isnan(it->second) ) continue; // NaN==NaN treated equal + if ( v != it->second ) return false; + } + return true; + } + bool operator!=(const Status &o) const { return !(*this == o); } + }; + Status status; ///< current focus state + Status last_published_status; ///< last published focus state + std::mutex publish_mutex; ///< serializes publish-on-change; @TODO revisit + + void get_status(); ///< refresh status from hardware + public: Interface() : context(), @@ -56,7 +82,7 @@ namespace Focus { should_subscriber_thread_run(false) { topic_handlers = { - { "_snapshot", std::function( + { Topic::SNAPSHOT, std::function( [this](const nlohmann::json &msg) { handletopic_snapshot(msg); } ) } }; } @@ -107,7 +133,7 @@ namespace Focus { long stop(); ///< send the stop-all-motion command to all controllers long send_command( const std::string &name, std::string cmd ); ///< writes the raw command as received to the master controller, no reply long send_command( const std::string &name, std::string cmd, std::string &retstring ); ///< writes command?, reads reply - void make_telemetry_message( std::string &retstring ); ///< assembles a telemetry message + void publish_status( bool force=false ); ///< publish focus state on change (or force) long test( std::string args, std::string &retstring ); diff --git a/focusd/focus_server.cpp b/focusd/focus_server.cpp index 0a8b4f1b..18bf9732 100644 --- a/focusd/focus_server.cpp +++ b/focusd/focus_server.cpp @@ -284,7 +284,14 @@ namespace Focus { auto newlogtime = next_occurrence( 12, 01, 00 ); std::this_thread::sleep_until( newlogtime ); close_log(); - init_log( logpath, Focus::DAEMON_NAME ); + // retry the re-open on a short timer so a transient failure (missing + // datedir, permission/owner drift, full disk) doesn't silence logging + // for ~24h until the next rotation + while ( init_log( logpath, Focus::DAEMON_NAME ) != 0 ) { + std::cerr << get_timestamp() << " (Focus::Server::new_log_day) " + << "ERROR: log rotation failed to open new logfile; retrying in 60s\n"; + std::this_thread::sleep_for( std::chrono::seconds(60) ); + } // ensure it doesn't immediately re-open std::this_thread::sleep_for( std::chrono::seconds(1) ); } @@ -403,7 +410,7 @@ namespace Focus { * Valid commands are listed in acamd_commands.h * */ - void Server::doit(Network::TcpSocket sock) { + void Server::doit(Network::TcpSocket &sock) { std::string function = "Focus::Server::doit"; long ret; std::stringstream message; @@ -616,22 +623,6 @@ namespace Focus { } else - // send telemetry upon request - // - if ( cmd == TELEMREQUEST ) { - if ( args=="?" || args=="help" ) { - retstring=TELEMREQUEST+"\n"; - retstring.append( " Returns a serialized JSON message containing telemetry\n" ); - retstring.append( " information, terminated with \"EOF\\n\".\n" ); - ret=HELP; - } - else { - this->interface.make_telemetry_message( retstring ); - ret = JSON; - } - } - else - // test routines // if ( cmd == FOCUSD_TEST ) { @@ -684,6 +675,8 @@ namespace Focus { if ( sock.Write( retstring ) < 0 ) connection_open=false; } + if ( ret==NO_ERROR ) this->interface.publish_status(); + if (!sock.isblocking()) break; // Non-blocking connection exits immediately. // Keep blocking connection open for interactive session. } diff --git a/focusd/focus_server.h b/focusd/focus_server.h index 441afd0e..03d1b807 100644 --- a/focusd/focus_server.h +++ b/focusd/focus_server.h @@ -96,7 +96,7 @@ namespace Focus { void exit_cleanly(void); ///< exit long configure_focusd(); ///< read and apply the configuration file - void doit(Network::TcpSocket sock); ///< the workhorse of each thread connetion + void doit(Network::TcpSocket &sock); ///< the workhorse of each thread connetion void handle_signal( int signo ); diff --git a/focusd/focusd.cpp b/focusd/focusd.cpp index 4926a650..321aa45d 100644 --- a/focusd/focusd.cpp +++ b/focusd/focusd.cpp @@ -126,6 +126,10 @@ int main(int argc, char **argv) { logwrite(function, "ERROR initializing publisher-subscriber handler"); focusd.exit_cleanly(); } + std::this_thread::sleep_for(std::chrono::milliseconds(250)); + + // read current state and force-publish so the world knows I'm online + focusd.interface.publish_status( true ); // This will pre-thread N_THREADS threads. // The 0th thread is reserved for the blocking port, and the rest are for the non-blocking port. diff --git a/powerd/power_interface.cpp b/powerd/power_interface.cpp index bfa2dfdb..f63be175 100644 --- a/powerd/power_interface.cpp +++ b/powerd/power_interface.cpp @@ -286,15 +286,15 @@ namespace Power { /***** Power::Interface::list ***********************************************/ - /***** Power::Interface::status *********************************************/ + /***** Power::Interface::get_status *****************************************/ /** * @brief list status of all plug devices * @param[out] retstring reference to string to contain the status of plug devices * @return ERROR | NO_ERROR | HELP * */ - long Interface::status( std::string args, std::string &retstring ) { - std::string function = "Power::Interface::status"; + long Interface::get_status( std::string args, std::string &retstring ) { + std::string function = "Power::Interface::get_status"; std::stringstream message, plugid; // Help @@ -344,7 +344,7 @@ namespace Power { message << plugid.str() << " " << status_string << " " << this->plugname[ plugid.str() ] << "\n"; - this->telemetry_map[this->plugname[plugid.str()]] = status; + this->status.plugstate[this->plugname[plugid.str()]] = status; } } message << this->missing; // notify of missing hardware, if any @@ -357,9 +357,13 @@ namespace Power { retstring = message.str(); + // status has been refreshed from hardware; publish if it changed + // + this->publish_status(); + return NO_ERROR; } - /***** Power::Interface::status *********************************************/ + /***** Power::Interface::get_status *****************************************/ /***** Power::Interface::command ********************************************/ @@ -580,6 +584,15 @@ namespace Power { return( ERROR ); break; } + + // After a successful set (ON/OFF/BOOT), re-read state which refreshes the + // status struct and publishes the change. Reads (command==-1) don't change state. + // + if ( command >= 0 && error == NO_ERROR ) { + std::string dontcare; + this->get_status( "", dontcare ); + } + return( error ); } catch ( const std::exception &e ) { @@ -591,62 +604,48 @@ namespace Power { /***** Power::Interface::command ********************************************/ - /***** Power::Interface::publish_snapshot ***********************************/ + /***** Power::Interface::publish_status *************************************/ /** - * @brief assembles a telemetry message - * @details This creates a JSON message for my telemetry info, then serializes - * it into a std::string ready to be sent over a socket. - * @param[out] retstring string containing the serialization of the JSON message + * @brief publish the power state, but only if it changed (or forced) + * @param[in] force optional (default=false) publish irrespective of change * * powerd telemetry is reported as true|false if the plug is on * */ - void Interface::publish_snapshot() { - std::string dontcare; - this->publish_snapshot(dontcare); - } - void Interface::publish_snapshot( std::string &retstring ) { + void Interface::publish_status( bool force ) { + std::lock_guard lock( this->publish_mutex ); // serialize publish-on-change - // assemble the telemetry into a json message - // Set a messagetype keyword to indicate what kind of message this is. + // unless forced, only publish if the power state changed // - nlohmann::json jmessage_out; - jmessage_out["source"] = "powerd"; // source of this telemetry + if ( !force && this->status == this->last_published_status ) return; - // get power status - // - this->status("", retstring); + nlohmann::json jmessage_out; + jmessage_out[Key::SOURCE] = Topic::POWERD; - // fill the jmessage_out with boolean values for the key/val pairs just retrieved - // to represent the powered state of the plug ("on" is true) + // a plug is reported true when it is on // - for ( const auto &[key,val] : this->telemetry_map ) { - jmessage_out[key]=(val==1?true:false); + for ( const auto &[key,val] : this->status.plugstate ) { + jmessage_out[key] = ( val==1 ? true : false ); } - // for backwards compatibility - jmessage_out["messagetype"]="powerinfo"; - retstring = jmessage_out.dump(); // serialize the json message into retstring - retstring.append(JEOF); // append the JSON message terminator + this->last_published_status = this->status; - // publish the jmessage - // try { this->publisher->publish( jmessage_out ); } catch( const std::exception &e ) { - logwrite( "Power::Interface::publish_snapshot", + logwrite( "Power::Interface::publish_status", "ERROR publishing message: "+std::string(e.what()) ); } } - /***** Power::Interface::publish_snapshot ***********************************/ + /***** Power::Interface::publish_status *************************************/ void Interface::handletopic_snapshot( const nlohmann::json &jmessage ) { - // If my name is in the jmessage then publish my snapshot + // If my topic is in the jmessage then publish my status // - if ( jmessage.contains( Power::DAEMON_NAME ) ) { - this->publish_snapshot(); + if ( jmessage.contains( Topic::POWERD ) ) { + this->publish_status(); } } } diff --git a/powerd/power_interface.h b/powerd/power_interface.h index b6a21874..1ffefcec 100644 --- a/powerd/power_interface.h +++ b/powerd/power_interface.h @@ -184,7 +184,19 @@ namespace Power { zmqpp::context context; bool class_initialized; size_t numdev; ///< number of NPS devices, or "units" - std::map telemetry_map; ///< map of plug status 0|1 indexed by plug nam + + /** + * @struct Status + * @brief published power state: plug power (0=off,1=on,-1=err) indexed by plug name + */ + struct Status { + std::map plugstate; + bool operator==(const Status &o) const { return plugstate == o.plugstate; } + bool operator!=(const Status &o) const { return !(*this == o); } + }; + Status status; ///< current power state + Status last_published_status; ///< last published power state + std::mutex publish_mutex; ///< serializes publish-on-change public: Interface() @@ -195,7 +207,7 @@ namespace Power { should_subscriber_thread_run(false) { topic_handlers = { - { "_snapshot", std::function( + { Topic::SNAPSHOT, std::function( [this](const nlohmann::json &msg) { handletopic_snapshot(msg); } ) } }; } @@ -249,9 +261,8 @@ namespace Power { bool isopen(); ///< is the NPS socket connection open? long command( std::string cmd, std::string &retstring ); ///< parse and form a command to send to the NPS unit void list( std::string args, std::string &retstring ); ///< list plug devices - long status( std::string args, std::string &retstring ); ///< status of all plug devices - void publish_snapshot(); ///< make serialized JSON telemetry message - void publish_snapshot( std::string &retstring ); ///< make serialized JSON telemetry message + long get_status( std::string args, std::string &retstring ); ///< status of all plug devices + void publish_status( bool force=false ); ///< publish power state on change (or force) }; /***** Power::Interface *****************************************************/ diff --git a/powerd/power_server.cpp b/powerd/power_server.cpp index 48699065..8c49ed69 100644 --- a/powerd/power_server.cpp +++ b/powerd/power_server.cpp @@ -291,7 +291,14 @@ namespace Power { auto newlogtime = next_occurrence( 12, 01, 00 ); std::this_thread::sleep_until( newlogtime ); close_log(); - init_log( logpath, DAEMON_NAME ); + // retry the re-open on a short timer so a transient failure (missing + // datedir, permission/owner drift, full disk) doesn't silence logging + // for ~24h until the next rotation + while ( init_log( logpath, DAEMON_NAME ) != 0 ) { + std::cerr << get_timestamp() << " (Power::Server::new_log_day) " + << "ERROR: log rotation failed to open new logfile; retrying in 60s\n"; + std::this_thread::sleep_for( std::chrono::seconds(60) ); + } // ensure it doesn't immediately re-open std::this_thread::sleep_for( std::chrono::seconds(1) ); } @@ -409,7 +416,7 @@ namespace Power { * Valid commands are listed in acamd_commands.h * */ - void Server::doit(Network::TcpSocket sock) { + void Server::doit(Network::TcpSocket &sock) { std::string function = "Power::Server::doit"; long ret; std::stringstream message; @@ -606,28 +613,12 @@ namespace Power { // power status // if ( cmd == POWERD_STATUS ) { - ret = this->interface.status( args, retstring ); + ret = this->interface.get_status( args, retstring ); if ( ret==NO_ERROR ) { ret=NOTHING; if ( sock.Write( retstring ) < 0 ) connection_open=false; } } - else - - // telemetry request - // - if ( cmd == SNAPSHOT || cmd == TELEMREQUEST ) { - if ( args=="?" || args=="help" ) { - retstring=TELEMREQUEST+"\n"; - retstring.append( " Returns a serialized JSON message containing telemetry\n" ); - retstring.append( " information, terminated with \"EOF\\n\".\n" ); - ret=HELP; - } - else { - this->interface.publish_snapshot( retstring ); - ret = JSON; - } - } // all other commands go to the powerd interface for parsing // @@ -680,8 +671,6 @@ namespace Power { if ( sock.Write( retstring ) < 0 ) connection_open=false; } - if ( ret==NO_ERROR ) this->interface.publish_snapshot(); - if (!sock.isblocking()) break; // Non-blocking connection exits immediately. // Keep blocking connection open for interactive session. } diff --git a/powerd/power_server.h b/powerd/power_server.h index 3cd1aade..0a831558 100644 --- a/powerd/power_server.h +++ b/powerd/power_server.h @@ -98,7 +98,7 @@ namespace Power { void exit_cleanly(void); ///< exit long configure_powerd(); ///< read and apply the configuration file - void doit(Network::TcpSocket sock); ///< the workhorse of each thread connetion + void doit(Network::TcpSocket &sock); ///< the workhorse of each thread connetion void handle_signal( int signo ); diff --git a/powerd/powerd.cpp b/powerd/powerd.cpp index 3763fd7f..a4154096 100644 --- a/powerd/powerd.cpp +++ b/powerd/powerd.cpp @@ -126,8 +126,10 @@ int main(int argc, char **argv) { } std::this_thread::sleep_for( std::chrono::milliseconds(500) ); - // publish snapshot of my telemetry so the world knows I'm online - powerd.interface.publish_snapshot(); + // read current state, then force-publish so the world knows I'm online + std::string dontcare; + powerd.interface.get_status( "", dontcare ); + powerd.interface.publish_status( true ); // This will pre-thread N_THREADS threads. // The 0th thread is reserved for the blocking port, and the rest are for the non-blocking port. diff --git a/run/targetcontrol.py b/run/targetcontrol.py index 59dbcc7e..d9c901a7 100755 --- a/run/targetcontrol.py +++ b/run/targetcontrol.py @@ -171,7 +171,10 @@ def flash_background(self, dt): self.status_label_bottom.color = [1, 0, 0, 1] # Red text on black background # Send the shell command and check the response - process = subprocess.Popen("/home/developer/Software/run/tcs getmotion", shell=True, stdout=subprocess.PIPE) + # Use "poll" prefix to suppress tcsd's per-command logging — see + # memory note project-targetcontrol-revisit for the longer-term plan + # (ZMQ daemon-state awareness instead of subprocess polling). + process = subprocess.Popen("/home/developer/Software/run/tcs poll getmotion", shell=True, stdout=subprocess.PIPE) output, _ = process.communicate() output = output.decode().strip() diff --git a/sequencerd/sequence.cpp b/sequencerd/sequence.cpp index fe18ade3..3e4de188 100644 --- a/sequencerd/sequence.cpp +++ b/sequencerd/sequence.cpp @@ -33,10 +33,6 @@ namespace Sequencer { if ( jmessage_in.contains( Sequencer::DAEMON_NAME ) ) { this->publish_snapshot(); } - else - if ( jmessage_in.contains( "test" ) ) { - logwrite( "Sequencer::Sequence::handletopic_snapshot", jmessage_in.dump() ); - } } /***** Sequencer::Sequence::handletopic_snapshot ***************************/ @@ -175,6 +171,7 @@ namespace Sequencer { this->publish_seqstate(); this->publish_waitstate(); this->publish_daemonstate(); + this->publish_targetinfo( true ); } /***** Sequencer::Sequence::publish_snapshot *******************************/ @@ -365,6 +362,7 @@ namespace Sequencer { // publish the structured seqstate topic // this->publish_seqstate(); + this->publish_targetinfo(); // targetinfo content is gated on seq_state (READY/RUNNING) this->cv.notify_all(); // emit a NOTICE on Topic::BROADCAST only when the lifecycle state has @@ -628,6 +626,12 @@ namespace Sequencer { if ( targetstate == TargetInfo::TARGET_FOUND ) { // target found, get the threads going + if (this->target.nexp==0) { // skip target if nexp==0 + message.str(""); message << "skipping target " << this->target.name; + logwrite(function, message.str()); + continue; + } + // If the TCS is not ready and the target contains TCS coordinates, // then we cannot proceed. // @@ -646,6 +650,8 @@ namespace Sequencer { this->thread_error_manager.set( THR_SEQUENCE_START ); // report any error break; } + + this->publish_targetinfo(); // publish the now-active target } else // targetstate not TARGET_FOUND if ( targetstate == TargetInfo::TARGET_NOT_FOUND ) { // no target found is an automatic stop @@ -834,9 +840,9 @@ namespace Sequencer { if (!this->is_science_frame_transfer) { logwrite( function, "waiting for readout" ); std::unique_lock lock(this->camerad_mtx); - while ( !this->camerad_cv.wait_for( lock, std::chrono::seconds(15), + while ( !this->camerad_cv.wait_for( lock, std::chrono::seconds(30), [this]() { return this->can_expose.load() || this->cancel_flag.load(); } ) ) { - logwrite( function, "timeout waiting for readout — requesting snapshot" ); + logwrite( function, "waiting for readout — requesting snapshot" ); lock.unlock(); this->request_snapshot(); lock.lock(); @@ -2187,7 +2193,12 @@ namespace Sequencer { } // Send casangle using tcsd wrapper for RINGGO command - // do not wait for reply + // do not wait for reply — intentional: Cassegrain rotation can take tens of + // seconds and the sequence continues while the operator guides on-target. + // + // WATCH: tcsd still sends a CID-tagged reply that accumulates in the socket + // receive buffer unread. DaemonClient::send() drains stale data before each + // new write, which prevents that orphaned reply from poisoning the next send. // { std::stringstream ringgo_cmd; @@ -2705,7 +2716,9 @@ namespace Sequencer { this->arm_readout_flag = true; // enables the async_listener to look for the readout and clear the EXPOSE bit - logwrite( function, "[DEBUG] sending expose command" ); + this->set_imgtype(); + + logwrite( function, "sending expose command" ); // Send the EXPOSE command to camera daemon and wait for the reply. // Also verify the reply contains "DONE": command_timeout returns NO_ERROR @@ -2786,6 +2799,33 @@ namespace Sequencer { /***** Sequencer::Sequence::modify_exptime **********************************/ + /***** Sequencer::Sequence::set_imgtype *************************************/ + /** + * @brief set IMGTYPE FITS keyword in camerad before each exposure + * @details Looks up the imgtype field from the CalibrationTarget config for + * the current target and sends it to camerad as a key command. + * @return ERROR|NO_ERROR + * + */ + long Sequence::set_imgtype() { + const std::string function("Sequencer::Sequence::set_imgtype"); + std::string reply; + + const std::string calname = std::string(this->target.iscal ? this->target.name : "SCIENCE"); + const std::string imgtype = this->caltarget.get_info(calname).imgtype; + + const std::string cmd = CAMERAD_KEY + " IMGTYPE=" + imgtype + + (this->target.iscal ? "//Calibration" : ""); + if ( this->camerad.send( cmd, reply ) != NO_ERROR ) { + logwrite( function, "ERROR sending '"+cmd+"': "+reply ); + return ERROR; + } + + return NO_ERROR; + } + /***** Sequencer::Sequence::set_imgtype *************************************/ + + /***** Sequencer::Sequence::startup *****************************************/ /** * @brief performs nightly startup @@ -3016,7 +3056,7 @@ namespace Sequencer { */ long Sequence::shutdown() { const std::string function("Sequencer::Sequence::shutdown"); - long error=ERROR; + long error=NO_ERROR; // Reject if a conflicting lifecycle transition is already in progress. // All other states (READY, NOTREADY, FAILED, RUNNING, PAUSED) are valid @@ -3071,6 +3111,7 @@ namespace Sequencer { // container of shutdown threads to launch, // pair their ThreadStatusBit with the function to call + // (TCS is shut down after these complete; see below) // std::vector>> worker_threads = { { THR_ACAM_SHUTDOWN, std::bind(&Sequence::acam_shutdown, this) }, @@ -3078,9 +3119,8 @@ namespace Sequencer { { THR_CAMERA_SHUTDOWN, std::bind(&Sequence::camera_shutdown, this) }, { THR_FLEXURE_SHUTDOWN, std::bind(&Sequence::flexure_shutdown, this) }, { THR_FOCUS_SHUTDOWN, std::bind(&Sequence::focus_shutdown, this) }, - { THR_SLICECAM_SHUTDOWN, std::bind(&Sequence::slit_shutdown, this) }, - { THR_SLIT_SHUTDOWN, std::bind(&Sequence::slicecam_shutdown, this) }, - { THR_TCS_SHUTDOWN, std::bind(&Sequence::tcs_shutdown, this) } + { THR_SLICECAM_SHUTDOWN, std::bind(&Sequence::slicecam_shutdown, this) }, + { THR_SLIT_SHUTDOWN, std::bind(&Sequence::slit_shutdown, this) } }; std::vector>> worker_futures; @@ -3109,11 +3149,28 @@ namespace Sequencer { } } + // TCS is shut down last so that any lingering guider pt_offsets from + // acamd during the slow cover-close above hit an open connection. + // + try { + if ( this->tcs_shutdown() != NO_ERROR ) { + this->broadcast.error( function, Sequencer::thread_names.at(THR_TCS_SHUTDOWN)+" failed" ); + error = ERROR; + } + else { + this->broadcast.notice( function, Sequencer::thread_names.at(THR_TCS_SHUTDOWN)+" shutdown complete" ); + } + } + catch (const std::exception& e) { + this->broadcast.error( function, Sequencer::thread_names.at(THR_TCS_SHUTDOWN)+" exception: "+std::string(e.what()) ); + error=ERROR; + } + if (error==NO_ERROR) { this->broadcast.notice(function, "instrument is shut down"); } else { - this->broadcast.error(function, "shut down may not be complete"); + this->broadcast.warning(function, "shut down may not be complete"); } // Always end in NOTREADY regardless of worker errors. SEQ_FAILED is @@ -3643,44 +3700,50 @@ namespace Sequencer { /***** Sequencer::Sequence::target_offset ***********************************/ - /***** Sequencer::Sequence::make_telemetry_message **************************/ + /***** Sequencer::Sequence::publish_targetinfo *****************************/ /** - * @brief assembles a telemetry message - * @details This creates a JSON message for my telemetry info, then serializes - * it into a std::string ready to be sent over a socket. - * @param[out] retstring string containing the serialization of the JSON message + * @brief publish target info on Topic::TARGETINFO, on change (or force) + * @details Builds a JSON message of the current target and publishes it + * only when it differs from the last published message, unless + * force is set. The message is empty unless seq state is + * READY or RUNNING. + * @param[in] force optional (default=false) publish irrespective of change * */ - void Sequence::make_telemetry_message( std::string &retstring ) { - // assemble the telemetry I want to report into a json message - // Set a messagetype keyword to indicate what kind of message this is. - // + void Sequence::publish_targetinfo( bool force ) { nlohmann::json jmessage; - jmessage["messagetype"] = "targetinfo"; + jmessage[Key::SOURCE] = Sequencer::DAEMON_NAME; - // fill telemetry message only when READY or RUNNING + // fill telemetry only when READY or RUNNING; otherwise an empty (no-target) message // if ( this->seq_state_manager.are_any_set( Sequencer::SEQ_READY, Sequencer::SEQ_RUNNING ) ) { - // Store unconfigured values as NAN. - // NAN values are not logged to the database. + // unconfigured values are stored as NAN // - jmessage["OBS_ID"] = this->target.obsid < 0 ? NAN : this->target.obsid; // OBSERVATION_ID - jmessage["NAME"] = this->target.name; // NAME - jmessage["SLITA"] = this->target.slitangle; // *OTMslitangle - jmessage["BINSPECT"] = this->target.binspect < 1 ? NAN : this->target.binspect; // *BINSPECT - jmessage["BINSPAT"] = this->target.binspat < 1 ? NAN : this->target.binspat; // *BINSPAT - jmessage["POINTMODE"] = this->target.pointmode; // *POINTMODE - jmessage["RA"] = this->target.ra_hms; // *RA - jmessage["DECL"] = this->target.dec_dms; // *DECL + jmessage[Key::TargetInfo::OBS_ID] = this->target.obsid < 0 ? NAN : this->target.obsid; + jmessage[Key::TargetInfo::NAME] = this->target.name; + jmessage[Key::TargetInfo::SLITA] = this->target.slitangle; + jmessage[Key::TargetInfo::BINSPECT] = this->target.binspect < 1 ? NAN : this->target.binspect; + jmessage[Key::TargetInfo::BINSPAT] = this->target.binspat < 1 ? NAN : this->target.binspat; + jmessage[Key::TargetInfo::POINTMODE] = this->target.pointmode; + jmessage[Key::TargetInfo::RA] = this->target.ra_hms; + jmessage[Key::TargetInfo::DECL] = this->target.dec_dms; } - retstring = jmessage.dump(); // serialize the json message into a string - - retstring.append(JEOF); // append JSON message terminator + // unless forced, only publish if the target info changed + // + std::lock_guard lock( this->publish_targetinfo_mtx ); // guard check-then-act + if ( !force && jmessage == this->last_published_targetinfo ) return; + this->last_published_targetinfo = jmessage; - return; + try { + this->publisher->publish( jmessage, Topic::TARGETINFO ); + } + catch ( const std::exception &e ) { + logwrite( "Sequencer::Sequence::publish_targetinfo", + "ERROR publishing message: "+std::string(e.what()) ); + } } - /***** Sequencer::Sequence::make_telemetry_message **************************/ + /***** Sequencer::Sequence::publish_targetinfo *****************************/ /***** Sequencer::Sequence::dothread_test_fpoffset **************************/ diff --git a/sequencerd/sequence.h b/sequencerd/sequence.h index 2e04272c..f0db3271 100644 --- a/sequencerd/sequence.h +++ b/sequencerd/sequence.h @@ -527,6 +527,8 @@ namespace Sequencer { Common::Broadcaster broadcast { this->publisher, Sequencer::DAEMON_NAME }; ///< logs and publishes a narrative message on Topic::BROADCAST std::string last_seqstate_str; ///< last seqstate string announced via broadcast_seqstate() (for change detection) + nlohmann::json last_published_targetinfo; ///< last published targetinfo (for change detection) + std::mutex publish_targetinfo_mtx; ///< guards last_published_targetinfo check-then-act uint32_t get_reqstate(); ///< get the reqstate word @@ -564,7 +566,7 @@ namespace Sequencer { long get_tcs_cass( double &cass ); long target_offset(); - void make_telemetry_message( std::string &retstring ); ///< assembles my telemetry message + void publish_targetinfo( bool force=false ); ///< publish target info on change (or force) long set_power_switch( PowerState state, const std::string which, std::chrono::seconds delay ); long check_power_switch( PowerState checkstate, const std::string which, bool &is_set ); @@ -579,6 +581,7 @@ namespace Sequencer { // These are various jobs that are done in their own threads // long trigger_exposure(); ///< trigger and wait for exposure + long set_imgtype(); ///< set IMGTYPE void abort_process(); ///< tries to abort everything void stop_exposure(); ///< stop exposure timer in progress long repeat_exposure(); ///< repeat the last exposure diff --git a/sequencerd/sequencer_interface.cpp b/sequencerd/sequencer_interface.cpp index 810feb63..a4787c2b 100644 --- a/sequencerd/sequencer_interface.cpp +++ b/sequencerd/sequencer_interface.cpp @@ -735,9 +735,10 @@ namespace Sequencer { } } - // number of exposures must be >= 1 + // number of exposures must be >= 0 + // class constructed with 1 but an intentional 0 means skip this target // - if (this->nexp <= 0) this->nexp=1; + if (this->nexp < 0) this->nexp=1; return NO_ERROR; } @@ -959,9 +960,9 @@ namespace Sequencer { auto size = Tokenize( args, tokens, " \t" ); - // there must be 19 args. see cfg file for complete description - if ( size != 19 ) { - logwrite(function, "ERROR bad config file. expected 19 but received " + // there must be 20 args. see cfg file for complete description + if ( size != 20 ) { + logwrite(function, "ERROR bad config file. expected 20 but received " +std::to_string(size)+" parameters"); return ERROR; } @@ -1000,10 +1001,13 @@ namespace Sequencer { info.domelamp[i] = on_off(tokens.at(11+i)); } - // tokens 13-19 + // tokens 13-18 -- modulator numbers are {1:6} for (size_t i=0; i<6; i++) { - info.lampmod[i] = on_off(tokens.at(13+i)); + info.lampmod[i+1] = on_off(tokens.at(13+i)); } + + // token[19] is FITS IMGTYPE + info.imgtype = tokens.at(19); } catch (const std::exception &e) { logwrite(function, "ERROR: "+std::string(e.what())); diff --git a/sequencerd/sequencer_interface.h b/sequencerd/sequencer_interface.h index f4569e7c..d2ba283b 100644 --- a/sequencerd/sequencer_interface.h +++ b/sequencerd/sequencer_interface.h @@ -139,6 +139,7 @@ namespace Sequencer { ///< struct holds all calibration parameters not in the target database typedef struct { std::string name; // calibration target name + std::string imgtype; // FITS IMGTYPE keyword for target std::map channel_active; // true=on bool caldoor; // true=open bool calcover; // true=open diff --git a/sequencerd/sequencer_server.cpp b/sequencerd/sequencer_server.cpp index 0fc7aed5..a8574d3b 100644 --- a/sequencerd/sequencer_server.cpp +++ b/sequencerd/sequencer_server.cpp @@ -90,11 +90,6 @@ namespace Sequencer { return ERROR; } -#ifdef LOGLEVEL_DEBUG - message.str(""); message << "[DEBUG] configkey " << configkey << "=" << configval; - logwrite( function, message.str() ); -#endif - // NBPORT if ( configkey == "NBPORT" ) { try { @@ -897,7 +892,14 @@ namespace Sequencer { auto newlogtime = next_occurrence( 12, 01, 00 ); std::this_thread::sleep_until( newlogtime ); close_log(); - init_log( logpath, Sequencer::DAEMON_NAME ); + // retry the re-open on a short timer so a transient failure (missing + // datedir, permission/owner drift, full disk) doesn't silence logging + // for ~24h until the next rotation + while ( init_log( logpath, Sequencer::DAEMON_NAME ) != 0 ) { + std::cerr << get_timestamp() << " (Sequencer::Server::new_log_day) " + << "ERROR: log rotation failed to open new logfile; retrying in 60s\n"; + std::this_thread::sleep_for( std::chrono::seconds(60) ); + } // ensure it doesn't immediately re-open std::this_thread::sleep_for( std::chrono::seconds(1) ); } @@ -1054,12 +1056,14 @@ namespace Sequencer { bool connection_open=true; - message.str(""); message << "thread " << sock.id << " accepted " +#ifdef LOGLEVEL_DEBUG + message.str(""); message << "[DEBUG] thread " << sock.id << " accepted " << (sock.isasync() ? "ASYNC " : "" ) << (sock.isblocking() ? "BLOCKING " : "NON-BLOCKING " ) << "connection on fd " << sock.getfd() << " port " << sock.getport(); logwrite( function, message.str() ); +#endif while ( connection_open ) { @@ -1594,23 +1598,6 @@ namespace Sequencer { if ( ret != NO_ERROR ) logwrite(function, "ERROR: unable to load config file"); else ret = this->configure_sequencer(); } - else - - // send my telemetry upon request - // - if ( cmd == TELEMREQUEST ) { - if ( args=="?" || args=="help" ) { - retstring=TELEMREQUEST+"\n"; - retstring.append( " Returns a serialized JSON message containing my telemetry\n" ); - retstring.append( " information, terminated with \"EOF\\n\".\n" ); - ret=HELP; - } - else { - this->sequence.make_telemetry_message( retstring ); - ret = JSON; - } - } - // Unknown commands generate an error // else { diff --git a/slicecamd/slicecam_interface.cpp b/slicecamd/slicecam_interface.cpp index 8ed2ea8a..313b9675 100644 --- a/slicecamd/slicecam_interface.cpp +++ b/slicecamd/slicecam_interface.cpp @@ -48,7 +48,9 @@ namespace Slicecam { // empty args returns status if (action=="status") { - retstring=this->is_fineacquire_running.load(std::memory_order_acquire)?"running":"stopped"; + const bool running = this->is_fineacquire_running.load(std::memory_order_acquire); + const bool locked = this->is_fineacquire_locked.load(std::memory_order_acquire); + retstring = running ? "running" : ( locked ? "stopped (locked)" : "stopped" ); return NO_ERROR; } else @@ -323,7 +325,14 @@ namespace Slicecam { // convergence check // if ( offset_arcsec <= this->fineacquire_state.goal_arcsec ) { - logwrite( function, "fine acquisition converged" ); + std::ostringstream oss; + oss << "fine acquisition converged: offset dRA=" << med_dra * 3600.0 + << " dDEC=" << med_ddec * 3600.0 + << " arcsec (r=" << offset_arcsec + << " arcsec, n=" << n + << " scatter=(" << sig_dra << "," << sig_ddec << ") arcsec)" + << " goal=" << this->fineacquire_state.goal_arcsec << " arcsec"; + logwrite( function, oss.str() ); this->is_fineacquire_locked.store( true, std::memory_order_release ); this->is_fineacquire_running.store( false, std::memory_order_release ); this->fineacquire_state.reset(); @@ -489,7 +498,7 @@ namespace Slicecam { snapshot_status[Topic::ACAMD]=true; } // set is_acam_guiding flag - bool acquired; + bool acquired = false; Common::extract_telemetry_value( jmessage, Key::Acamd::IS_ACQUIRED, acquired ); this->is_acam_guiding.store(acquired, std::memory_order_relaxed); @@ -624,7 +633,8 @@ namespace Slicecam { jmessage_out[Key::SOURCE] = Topic::SLICECAMD; for ( const auto &[name, cam] : this->camera.andor ) { - std::string key="TANDOR_SCAM_"+name; + const std::string &key = (name == "L") ? Key::Slicecamd::TANDOR_L + : Key::Slicecamd::TANDOR_R; jmessage_out[key] = static_cast(cam->camera_info.ccdtemp); // the database wants a float } try { @@ -639,6 +649,40 @@ namespace Slicecam { /***** Slicecam::Interface::publish_snapshot ********************************/ + /***** Slicecam::Interface::publish_temperature *****************************/ + /** + * @brief publish only the andor CCD temperatures on Topic::SLICECAMD + * @details Published on a fixed interval (see slicecamd.cpp), not on + * change, since the CCD temperature varies continuously. + * + */ + void Interface::publish_temperature() { + nlohmann::json jmessage; + jmessage[Key::SOURCE] = Topic::SLICECAMD; + + for ( const auto &[name, cam] : this->camera.andor ) { + const std::string &key = (name == "L") ? Key::Slicecamd::TANDOR_L + : Key::Slicecamd::TANDOR_R; + if ( cam->is_open() ) { + int ccdtemp=99; + cam->get_temperature(ccdtemp); + jmessage[key] = static_cast(ccdtemp); // the database wants a float + } + else { + jmessage[key] = NAN; + } + } + try { + this->publisher->publish( jmessage, Topic::SLICECAMD ); + } + catch ( const std::exception &e ) { + logwrite( "Slicecam::Interface::publish_temperature", + "ERROR publishing message: "+std::string(e.what()) ); + } + } + /***** Slicecam::Interface::publish_temperature *****************************/ + + /***** Slicecam::Interface::request_snapshot ********************************/ /** * @brief sends request for snapshot diff --git a/slicecamd/slicecam_interface.h b/slicecamd/slicecam_interface.h index 0785aa84..c6a3d21a 100644 --- a/slicecamd/slicecam_interface.h +++ b/slicecamd/slicecam_interface.h @@ -263,6 +263,7 @@ namespace Slicecam { void handletopic_tcsd( const nlohmann::json &jmessage ); void publish_status(bool force=false); void publish_snapshot(); + void publish_temperature(); ///< publish only the andor temperatures on Topic::SLICECAMD (periodic) void request_snapshot(); bool wait_for_snapshots(); diff --git a/slicecamd/slicecam_server.cpp b/slicecamd/slicecam_server.cpp index d3b35464..b80e8fad 100644 --- a/slicecamd/slicecam_server.cpp +++ b/slicecamd/slicecam_server.cpp @@ -230,7 +230,14 @@ namespace Slicecam { auto newlogtime = next_occurrence( 12, 01, 00 ); std::this_thread::sleep_until( newlogtime ); close_log(); - init_log( logpath, Slicecam::DAEMON_NAME ); + // retry the re-open on a short timer so a transient failure (missing + // datedir, permission/owner drift, full disk) doesn't silence logging + // for ~24h until the next rotation + while ( init_log( logpath, Slicecam::DAEMON_NAME ) != 0 ) { + std::cerr << get_timestamp() << " (Slicecam::Server::new_log_day) " + << "ERROR: log rotation failed to open new logfile; retrying in 60s\n"; + std::this_thread::sleep_for( std::chrono::seconds(60) ); + } // ensure it doesn't immediately re-open std::this_thread::sleep_for( std::chrono::seconds(1) ); } @@ -346,7 +353,7 @@ namespace Slicecam { * Valid commands are listed in slicecamd_commands.h * */ - void Server::doit( Network::TcpSocket sock ) { + void Server::doit( Network::TcpSocket &sock ) { std::string function = "Slicecam::Server::doit"; long ret; std::stringstream message; diff --git a/slicecamd/slicecam_server.h b/slicecamd/slicecam_server.h index 5123d220..94e77567 100644 --- a/slicecamd/slicecam_server.h +++ b/slicecamd/slicecam_server.h @@ -107,7 +107,7 @@ namespace Slicecam { static void thread_main( Slicecam::Server &slicecam, Network::TcpSocket sock ); ///< main function for all non-blocked threads static void async_main( Slicecam::Server &slicecam, Network::UdpSocket sock ); ///< asynchronous message sending thread - void doit(Network::TcpSocket sock); ///< the workhorse of each thread connetion + void doit(Network::TcpSocket &sock); ///< the workhorse of each thread connetion void exit_cleanly(); ///< exit long configure_slicecamd(); ///< read and apply the configuration file diff --git a/slicecamd/slicecamd.cpp b/slicecamd/slicecamd.cpp index 82279803..e4aa0006 100644 --- a/slicecamd/slicecamd.cpp +++ b/slicecamd/slicecamd.cpp @@ -153,9 +153,22 @@ int main(int argc, char **argv) { slicecamd.exit_cleanly(); } - std::this_thread::sleep_for( std::chrono::milliseconds(100) ); + std::this_thread::sleep_for( std::chrono::milliseconds(250) ); + slicecamd.interface.publish_snapshot(); + + std::this_thread::sleep_for( std::chrono::milliseconds(250) ); slicecamd.interface.request_snapshot(); + // publish the andor CCD temperatures on a fixed 60-second interval + // (temperature varies continuously, so it is not published on change) + // + std::thread( []( Slicecam::Interface &iface ) { + while ( true ) { + iface.publish_temperature(); + std::this_thread::sleep_for( std::chrono::seconds(60) ); + } + }, std::ref(slicecamd.interface) ).detach(); + // This will pre-thread N_THREADS threads. // The 0th thread is reserved for the blocking port, and the rest are for the non-blocking port. // Each thread gets a socket object. All of the socket objects are stored in a vector container. diff --git a/slitd/slit_interface.cpp b/slitd/slit_interface.cpp index 96edc275..ec920d1c 100644 --- a/slitd/slit_interface.cpp +++ b/slitd/slit_interface.cpp @@ -218,9 +218,23 @@ namespace Slit { return ERROR; } - // All the work is done by the PI motor interface class + // The work is done by the PI motor interface class, which blocks until + // homing is complete. + // + long error = this->motorinterface.home( arg, retstring ); + + // Homing changes the home state and the actuator positions. Refresh the + // home state (as open() does) and read back the positions with get(), + // which refreshes width/offset/posA/posB and publishes the new status. // - return this->motorinterface.home( arg, retstring ); + if ( error == NO_ERROR ) { + std::string homestate; + this->is_home( "", homestate ); + status.ishome = ( homestate=="true" ? true : false ); + error = this->get( retstring ); + } + + return error; } /***** Slit::Interface::home ************************************************/ @@ -723,6 +737,7 @@ namespace Slit { * */ void Interface::publish_status(bool force) { + std::lock_guard lock( this->publish_mutex ); // serialize publish-on-change // unless forced, only publish if there was a change if ( !force && this->status == this->last_published_status ) return; diff --git a/slitd/slit_interface.h b/slitd/slit_interface.h index 235c2455..56ff1257 100644 --- a/slitd/slit_interface.h +++ b/slitd/slit_interface.h @@ -226,6 +226,7 @@ namespace Slit { Status status; Status last_published_status; + std::mutex publish_mutex; ///< serializes publish-on-change Common::Queue async; diff --git a/slitd/slit_server.cpp b/slitd/slit_server.cpp index 489ab199..412b34c6 100644 --- a/slitd/slit_server.cpp +++ b/slitd/slit_server.cpp @@ -288,7 +288,14 @@ namespace Slit { auto newlogtime = next_occurrence( 12, 01, 00 ); std::this_thread::sleep_until( newlogtime ); close_log(); - init_log( logpath, Slit::DAEMON_NAME ); + // retry the re-open on a short timer so a transient failure (missing + // datedir, permission/owner drift, full disk) doesn't silence logging + // for ~24h until the next rotation + while ( init_log( logpath, Slit::DAEMON_NAME ) != 0 ) { + std::cerr << get_timestamp() << " (Slit::Server::new_log_day) " + << "ERROR: log rotation failed to open new logfile; retrying in 60s\n"; + std::this_thread::sleep_for( std::chrono::seconds(60) ); + } // ensure it doesn't immediately re-open std::this_thread::sleep_for( std::chrono::seconds(1) ); } @@ -407,7 +414,7 @@ namespace Slit { * Valid commands are listed in acamd_commands.h * */ - void Server::doit(Network::TcpSocket sock) { + void Server::doit(Network::TcpSocket &sock) { std::string function = "Slit::Server::doit"; long ret; std::stringstream message; diff --git a/slitd/slit_server.h b/slitd/slit_server.h index 939f7124..295c46f9 100644 --- a/slitd/slit_server.h +++ b/slitd/slit_server.h @@ -97,7 +97,7 @@ namespace Slit { void exit_cleanly(void); ///< exit long configure_slitd(); ///< read and apply the configuration file - void doit(Network::TcpSocket sock); ///< the workhorse of each thread connetion + void doit(Network::TcpSocket &sock); ///< the workhorse of each thread connetion void handle_signal( int signo ); diff --git a/slitd/slitd.cpp b/slitd/slitd.cpp index 4fd6a1fb..9b215537 100644 --- a/slitd/slitd.cpp +++ b/slitd/slitd.cpp @@ -126,7 +126,7 @@ int main(int argc, char **argv) { slitd.exit_cleanly(); } - std::this_thread::sleep_for( std::chrono::milliseconds(100) ); + std::this_thread::sleep_for( std::chrono::milliseconds(250) ); slitd.interface.publish_status(true); // This will pre-thread N_THREADS threads. diff --git a/tcsd/CMakeLists.txt b/tcsd/CMakeLists.txt index 5412d5cb..d1008d74 100644 --- a/tcsd/CMakeLists.txt +++ b/tcsd/CMakeLists.txt @@ -10,8 +10,8 @@ set( TCSD_DIR ${PROJECT_BASE_DIR}/tcsd ) set( CMAKE_CXX_STANDARD 17 ) -add_definitions( -Wall -ansi -O1 -Wno-variadic-macros -ggdb ) -#add_definitions( -Wall -Wextra -Wconversion -Wshadow -ansi -O1 -Wno-variadic-macros -ggdb ) +add_definitions( -Wall -O1 -Wno-variadic-macros -ggdb ) +#add_definitions( -Wall -Wextra -Wconversion -Wshadow -O1 -Wno-variadic-macros -ggdb ) include_directories( ${PROJECT_BASE_DIR}/utils ) include_directories( ${PROJECT_BASE_DIR}/common ) diff --git a/tcsd/tcs_interface.cpp b/tcsd/tcs_interface.cpp index deee79b0..3e6331da 100644 --- a/tcsd/tcs_interface.cpp +++ b/tcsd/tcs_interface.cpp @@ -18,10 +18,6 @@ namespace TCS { if ( jmessage.contains( TCS::DAEMON_NAME ) ) { this->publish_snapshot(); } - else - if ( jmessage.contains( "test" ) ) { - logwrite( "TCS::Interface::handletopic_snapshot", jmessage.dump() ); - } } @@ -72,11 +68,14 @@ namespace TCS { } // broadcast motion status if it changed + { + std::lock_guard lock(this->publish_mutex); // guard check-then-act on last_published_motion if (!motion.empty() && motion != this->last_published_motion) { this->broadcast.notice("TCS::Interface::publish_snapshot", "telescope "+motion); this->last_published_motion = motion; } + } // for backwards compatibility jmessage_out["messagetype"] = "tcsinfo"; @@ -1282,6 +1281,15 @@ namespace TCS { return HELP; } + // Skip the hardware send when not connected, so callers that poll + // continuously (sequencerd, targetcontrol GUI) don't flood the log + // with ERROR on every call after a shutdown. + // + if ( ! this->tcs_info.isopen ) { + retstring = "not_connected"; + return NO_ERROR; + } + // Send the command // if ( this->send_command( "?MOTION", retstring, TCS::FAST_RESPONSE ) != NO_ERROR ) { @@ -1361,11 +1369,6 @@ namespace TCS { error = ERROR; } -#ifdef LOGLEVEL_DEBUG - message.str(""); message << "[DEBUG] requested cass angle " << angle; - logwrite( function, message.str() ); -#endif - std::stringstream cmd; cmd << "RINGGO " << std::fixed << std::setprecision(2) << angle; @@ -1636,10 +1639,7 @@ namespace TCS { std::string reply; -message.str(""); message << "DEBUG] sending cmd=" << cmd << " with type=" << (conn_type==TCS::FAST_RESPONSE?"fast":"slow") << " and to=" << to; -logwrite(function,message.str()); tcs.execute_command( cmd, reply, conn_type, to ); -logwrite(function,"[DEBUG] back from cmd="+cmd+" with reply="+reply); // Success or failure depends on what's in the TCS reply, // which depends on the command. diff --git a/tcsd/tcs_interface.h b/tcsd/tcs_interface.h index 6e73e6bd..ad0b1449 100644 --- a/tcsd/tcs_interface.h +++ b/tcsd/tcs_interface.h @@ -228,8 +228,6 @@ namespace TCS { if ( conn_type == TCS::SLOW_RESPONSE ) { // slow command, lock and return the slow command socket connection std::lock_guard lock( mtx_slow ); -// logwrite( function, "[DEBUG] slow command socket connection acquired on fd " -// +std::to_string(sock_slow->sock.getfd())+" for "+name+" at "+host+":"+std::to_string(port) ); return sock_slow; } else { @@ -237,9 +235,6 @@ namespace TCS { // fast command, take a socket connection from the pool while ( true ) { for ( auto &conn : pool ) { -// logwrite(function, "[DEBUG] checking connection: fd " + std::to_string(conn.socket->sock.getfd()) + -// ", inuse: " + std::to_string(conn.inuse) + -// ", connected: " + std::to_string(conn.socket->sock.isconnected())); if ( !conn.socket->sock.isconnected() ) { logwrite( function, "fast command socket fd "+std::to_string(conn.socket->sock.getfd()) +" not open, attempting to reconnect" ); @@ -247,14 +242,10 @@ namespace TCS { logwrite( function, "ERROR opening fast command socket connection" ); return nullptr; } -// logwrite( function, "[DEBUG] returning fast command socket connection on fd " -// +std::to_string(conn.socket->sock.getfd()) ); return conn.socket; } if ( !conn.inuse ) { conn.inuse=true; -// logwrite( function, "[DEBUG] fast command socket connection acquired on fd " -// +std::to_string(conn.socket->sock.getfd()) ); return conn.socket; } logwrite( function, "fast command socket fd "+std::to_string(conn.socket->sock.getfd())+" inuse, trying another" ); @@ -282,9 +273,6 @@ namespace TCS { * */ long execute_command( const std::string &cmd, std::string &reply, TCS::ConnectionType conn_type ) { -std::stringstream message; -message << "[DEBUG] in 3 arg version and using polltimeout=" << POLLTIMEOUT; -logwrite("TCS::TcsIO::execute_command", message.str()); return execute_command( cmd, reply, conn_type, POLLTIMEOUT ); } /***** TCS::TcsIO::execute_command **************************************/ @@ -301,33 +289,21 @@ logwrite("TCS::TcsIO::execute_command", message.str()); */ long execute_command( const std::string &cmd, std::string &reply, TCS::ConnectionType conn_type, int timeout ) { const std::string function("TCS::TcsIO::execute_command"); -std::stringstream message; -message << "[DEBUG] in 4 arg version with timeout=" << timeout; -logwrite(function,message.str()); long ret=ERROR; if ( conn_type == TCS::SLOW_RESPONSE ) { // slow command { std::lock_guard lock( mtx_slow ); - logwrite( function, "[DEBUG] slow command socket acquired on fd " - +std::to_string(sock_slow->sock.getfd())+" for "+name+" at "+host+":"+std::to_string(port) - +" timeout="+std::to_string(timeout) ); ret = sock_slow->send_command( cmd, reply, timeout ); } - logwrite( function, "[DEBUG] releasing slow command socket connection on fd " - +std::to_string(sock_slow->sock.getfd())+" for "+name+" at "+host+":"+std::to_string(port) ); if (ret!=NO_ERROR) sock_slow->reconnect(); return ret; } else { -// logwrite(function,"[DEBUG] asking for fast connection"); auto conn = this->get_connection( TCS::FAST_RESPONSE ); if (conn) { -// logwrite(function,"[DEBUG] sending fast command"); ret = conn->send_command( cmd, reply ); -// logwrite(function,"[DEBUG] fast command sent"); -// logwrite(function,"[DEBUG] returning fast connection"); return_connection( conn ); } else { @@ -354,8 +330,6 @@ logwrite(function,message.str()); for ( auto &conn : pool ) { if ( conn.socket == sock ) { conn.inuse = false; -// logwrite( function, "[DEBUG] returned socket connection to pool for fd " -// +std::to_string(conn.socket->sock.getfd()) ); cv.notify_one(); // notifies any waiting get_connections() return; } diff --git a/tcsd/tcs_server.cpp b/tcsd/tcs_server.cpp index 11ff5713..6501cf95 100644 --- a/tcsd/tcs_server.cpp +++ b/tcsd/tcs_server.cpp @@ -366,7 +366,14 @@ namespace TCS { auto newlogtime = next_occurrence( 12, 01, 00 ); std::this_thread::sleep_until( newlogtime ); close_log(); - init_log( logpath, TCS::DAEMON_NAME ); + // retry the re-open on a short timer so a transient failure (missing + // datedir, permission/owner drift, full disk) doesn't silence logging + // for ~24h until the next rotation + while ( init_log( logpath, TCS::DAEMON_NAME ) != 0 ) { + std::cerr << get_timestamp() << " (TCS::Server::new_log_day) " + << "ERROR: log rotation failed to open new logfile; retrying in 60s\n"; + std::this_thread::sleep_for( std::chrono::seconds(60) ); + } // ensure it doesn't immediately re-open std::this_thread::sleep_for( std::chrono::seconds(1) ); } @@ -623,6 +630,15 @@ void doit(TcsIO &tcs_io, const std::string &client_cmd, bool is_slow_command) { polling = true; } + // Commands that are inherently polling are silent without needing the client + // to send a "poll" prefix. Keeps the log readable when external clients + // (status displays, observer tools) poll status at ~1 Hz. + // + static const std::set auto_poll = { + TCSD_GET_MOTION, TCSD_GET_FOCUS, TCSD_GET_NAME, TCSD_ISOPEN + }; + if ( auto_poll.count(cmd) ) polling = true; + if (cmd.empty()) {sock.Write("\n"); continue;} // acknowledge empty command so client doesn't time out if (cmd_sep == std::string::npos) { // If no space was found, @@ -800,9 +816,9 @@ void doit(TcsIO &tcs_io, const std::string &client_cmd, bool is_slow_command) { ret = this->interface.native( args, retstring ); } else - if ( cmd == SNAPSHOT || cmd == TELEMREQUEST ) { + if ( cmd == SNAPSHOT ) { if ( args=="?" || args=="help" ) { - retstring=TELEMREQUEST+"\n"; + retstring=SNAPSHOT+"\n"; retstring.append( " Returns a serialized JSON message containing telemetry\n" ); retstring.append( " information, terminated with \"EOF\\n\".\n" ); ret=HELP; diff --git a/tcsd/tcs_server.h b/tcsd/tcs_server.h index aaa3aff9..5ef03831 100644 --- a/tcsd/tcs_server.h +++ b/tcsd/tcs_server.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/tcsd/tcsd_client.cpp b/tcsd/tcsd_client.cpp index 4ea8b42b..7e2f4ef7 100644 --- a/tcsd/tcsd_client.cpp +++ b/tcsd/tcsd_client.cpp @@ -316,12 +316,6 @@ // double quad = std::sqrt( std::pow(ra_d,2) + std::pow(dec_d,2) ); int to = static_cast( std::max( 5000.0, ( 5000.0 + (quad / rate) * 1000.0 * 1.5 ) ) ); - logwrite( function, "[DEBUG] ra_d="+std::to_string(ra_d)+ - " dec_d="+std::to_string(dec_d)+ - " rate="+std::to_string(rate)+ - " quad="+std::to_string(quad)+ - " to="+std::to_string(to) ); - logwrite( function, "[DEBUG] sending "+tcscmd.str()+" with timeout="+std::to_string(to)+" ms" ); if ( this->client.send( tcscmd.str(), tcsreply, to ) != NO_ERROR ) { logwrite( function, "ERROR sending guider offsets" ); return ERROR; @@ -556,11 +550,6 @@ error = ERROR; } -#ifdef LOGLEVEL_DEBUG // this can be a little much when polling - message.str(""); message << "[DEBUG] from tcs_message \"" << tcs_message << "\" extracted value: " << value << " error=" << error; - logwrite( function, message.str() ); -#endif - return error; } /***** TcsDaemonClient::extract_value ***************************************/ @@ -582,9 +571,6 @@ std::vector tokens; if ( value == TCS_SUCCESS ) { -#ifdef LOGLEVEL_DEBUG - logwrite( function, "[DEBUG] TCS successful completion" ); -#endif return NO_ERROR; } else { diff --git a/telemd/CMakeLists.txt b/telemd/CMakeLists.txt index fbb93bc7..0d5844a7 100644 --- a/telemd/CMakeLists.txt +++ b/telemd/CMakeLists.txt @@ -10,7 +10,7 @@ set( TELEMD_DIR ${PROJECT_BASE_DIR}/telemd ) set( CMAKE_CXX_STANDARD 17 ) -add_definitions( -Wall -ansi -O1 -Wno-variadic-macros -ggdb ) +add_definitions( -Wall -O1 -Wno-variadic-macros -ggdb ) include_directories( ${PROJECT_BASE_DIR}/utils ) include_directories( ${PROJECT_BASE_DIR}/common ) diff --git a/telemd/telem_server.cpp b/telemd/telem_server.cpp index a26fb23f..0d0f76f9 100644 --- a/telemd/telem_server.cpp +++ b/telemd/telem_server.cpp @@ -148,7 +148,14 @@ namespace Telemetry { while (1) { std::this_thread::sleep_for( std::chrono::seconds( nextday ) ); close_log(); - init_log( logpath, Telemetry::DAEMON_NAME ); + // retry the re-open on a short timer so a transient failure (missing + // datedir, permission/owner drift, full disk) doesn't silence logging + // for ~24h until the next rotation + while ( init_log( logpath, Telemetry::DAEMON_NAME ) != 0 ) { + std::cerr << get_timestamp() << " (Telemetry::Server::new_log_day) " + << "ERROR: log rotation failed to open new logfile; retrying in 60s\n"; + std::this_thread::sleep_for( std::chrono::seconds(60) ); + } } } /***** new_log_day **********************************************************/ @@ -261,7 +268,7 @@ namespace Telemetry { * Valid commands are listed in telemd_commands.h * */ - void Server::doit( Network::TcpSocket sock ) { + void Server::doit( Network::TcpSocket &sock ) { std::string function = "Telemetry::Server::doit"; long ret; std::stringstream message; diff --git a/telemd/telem_server.h b/telemd/telem_server.h index 8e8b6ec4..2038fcea 100644 --- a/telemd/telem_server.h +++ b/telemd/telem_server.h @@ -95,7 +95,7 @@ namespace Telemetry { static void thread_main( Telemetry::Server &telem, Network::TcpSocket sock ); ///< main function for all non-blocked threads static void async_main( Telemetry::Server &telem, Network::UdpSocket sock ); ///< asynchronous message sending thread - void doit(Network::TcpSocket sock); ///< the workhorse of each thread connetion + void doit(Network::TcpSocket &sock); ///< the workhorse of each thread connetion void exit_cleanly(); ///< exit long configure_telemd(); ///< read and apply the configuration file }; diff --git a/thermald/CMakeLists.txt b/thermald/CMakeLists.txt index f21492b1..c4a0144d 100644 --- a/thermald/CMakeLists.txt +++ b/thermald/CMakeLists.txt @@ -10,7 +10,7 @@ set( THERMALD_DIR ${PROJECT_BASE_DIR}/thermald ) set( CMAKE_CXX_STANDARD 17 ) -add_definitions( -Wall -ansi -O1 -Wno-variadic-macros -ggdb ) +add_definitions( -Wall -O1 -Wno-deprecated-declarations -Wno-variadic-macros -ggdb ) include_directories( ${PROJECT_BASE_DIR}/utils ) include_directories( ${PROJECT_BASE_DIR}/LKS ) diff --git a/thermald/thermal_interface.cpp b/thermald/thermal_interface.cpp index 9f59a09c..a81b024c 100644 --- a/thermald/thermal_interface.cpp +++ b/thermald/thermal_interface.cpp @@ -12,180 +12,133 @@ namespace Thermal { - /***** Thermal::Interface::make_telemetry_message ***************************/ + + + /***** Thermal::Interface::publish_status **********************************/ /** - * @brief assembles a telemetry message - * @details This creates a JSON message for telemetry info, then serializes - * it into a std::string ready to be sent over a socket so that - * outside clients can ask for my telemetry. - * @param[out] retstring string containing the serialization of the JSON message + * @brief publish a thermalinfo snapshot on Topic::THERMALD + * @details Publishes the current merged telemdata (Lakeshore + Campbell + + * external) as float values, blocking NaNs. Called periodically + * from the telemetry loop and on snapshot request. * */ - void Interface::make_telemetry_message( std::string &retstring ) { - - // read the data only if the maps are empty - // - if ( this->lakeshoredata.empty() ) this->lakeshore_readall(); - if ( this->campbell.datamap.empty() ) this->campbell.read_data(); - - // assemble the telemetry into a json message - // Set a messagetype keyword to indicate what kind of message this is. - // + void Interface::publish_status() { nlohmann::json jmessage; - jmessage["messagetype"] = "thermalinfo"; + jmessage[Key::SOURCE] = Topic::THERMALD; - // Loop through the two datamaps, campbell.datamap and lakeshoredata + // copy the latest readings under lock, then build the message from the copy // - try { - - // Make a copy of telemdata which contains all the latest readings - // - auto showdata = this->telemdata; - - // If that is empty, or the arg is "force" then read all sensors now - // - if ( showdata.empty() ) { - this->get_external_telemetry(); - this->lakeshore_readall(); - this->campbell.read_data(); - showdata.merge( this->externaldata ); - showdata.merge( this->campbell.datamap ); - showdata.merge( this->lakeshoredata ); - } - - // Now loop through that map and if the value is a float then - // add it to the jmessage (this blocks NANs). - // - for ( const auto &[key,val] : showdata ) { - if ( val.getType() == mysqlx::Value::FLOAT ) { - jmessage[key] = val.get(); - } - } + std::map showdata; + { + std::lock_guard lock( this->telemdata_mtx ); + showdata = this->telemdata; + } - retstring = jmessage.dump(); // serialize the json message into retstring + for ( const auto &[key,val] : showdata ) { + if ( val.getType() == mysqlx::Value::FLOAT ) jmessage[key] = val.get(); + } - retstring.append(JEOF); // append the JSON message terminator + try { + this->publisher->publish( jmessage, Topic::THERMALD ); } catch( const std::exception &e ) { - logwrite( "Thermal::Interface::make_telemetry_message", - "ERROR assembling telemetry message: "+std::string(e.what()) ); + logwrite( "Thermal::Interface::publish_status", + "ERROR publishing message: "+std::string(e.what()) ); } - - return; } - /***** Thermal::Interface::make_telemetry_message ***************************/ + /***** Thermal::Interface::publish_status **********************************/ - /***** Thermal::Interface::get_external_telemetry ***************************/ + /***** Thermal::Interface::handletopic_snapshot ****************************/ /** - * @brief collect telemetry from another daemon - * @details This is used for any telemetry that I need to collect from - * another daemon. Send the command "sendtelem" to the daemon, which - * will respond with a JSON message. The daemon(s) to contact - * are configured with the TELEM_PROVIDER key in the config file. + * @brief respond to a snapshot request by publishing my status + * @param[in] jmessage subscribed-received JSON message * */ - void Interface::get_external_telemetry() { - - // protects externaldata from simultaneous access - // - std::lock_guard lock( this->externaldata_mtx ); + void Interface::handletopic_snapshot( const nlohmann::json &jmessage ) { + if ( jmessage.contains( Topic::THERMALD ) ) { + this->publish_status(); + } + } + /***** Thermal::Interface::handletopic_snapshot ****************************/ - // clear the external telemetry map - // any external telemetry collected here gets put into this - // map by handle_json_message() - // - this->externaldata.clear(); - // Loop through each configured telemetry provider. This requests - // their telemetry which is returned as a serialized json string - // held in retstring. - // - // handle_json_message() will parse the serialized json string. - // - std::string retstring; -/*** - for ( const auto &provider : this->telemetry_providers ) { - Common::collect_telemetry( provider, retstring ); - if ( !retstring.empty() ) handle_json_message(retstring); + /***** Thermal::Interface::process_external_data **************************/ + /** + * @brief apply DB-bound JSON keys from a pub/sub message to externaldata + * @details Iterates the binding table in common/db_column_defs.h; for each + * entry whose jkey is present and non-null in the message, stores + * the value into externaldata under the bound column name. + * @param[in] jmessage json message received via pub/sub + */ + void Interface::process_external_data( const nlohmann::json &jmessage ) { + std::lock_guard lock( this->externaldata_mtx ); + for ( const auto &entry : DbColumnDefs::Columns ) { + if ( !jmessage.contains(entry.jkey) ) continue; + if ( jmessage[entry.jkey].is_null() ) continue; + try { + this->externaldata[entry.column] = jmessage[entry.jkey].get(); + } + catch ( const nlohmann::json::type_error &e ) { + logwrite( "Thermal::Interface::process_external_data", + "ERROR key \""+std::string(entry.jkey) + +"\" not expected type: "+e.what() ); + } } -***/ - - return; } - /***** Thermal::Interface::get_external_telemetry ***************************/ + /***** Thermal::Interface::process_external_data **************************/ - /***** Thermal::Interface::handle_json_message ******************************/ + /***** Thermal::Interface::handletopic_acamd ******************************/ /** - * @brief parses incoming telemetry messages - * @details The Interface::get_external_telemetry() will receive telemetry - * from another daemon in a JSON message. Pass that message - * to this function to parse it. The process_key() function - * verifies the key before storing it in the externaldata map. - * @param[in] message_in incoming JSON message - * @return ERROR | NO_ERROR + * @brief stash the acam CCD temperature into externaldata + * @param[in] jmessage subscribed-received JSON message on Topic::ACAMD_TEMP * */ - long Interface::handle_json_message( std::string message_in ) { - const std::string function="Thermal::Interface::handle_json_message"; - std::stringstream message; + void Interface::handletopic_acamd( const nlohmann::json &jmessage ) { + this->process_external_data( jmessage ); + } + /***** Thermal::Interface::handletopic_acamd ******************************/ - try { - nlohmann::json jmessage = nlohmann::json::parse( message_in ); - std::string messagetype; - // jmessage must not contain key "error" and must contain key "messagetype" - // - if ( !jmessage.contains("error") ) { - if ( jmessage.contains("messagetype") ) { - messagetype = jmessage["messagetype"]; - } - else { - logwrite( function, "ERROR received JSON message with no messagetype" ); - return ERROR; - } - } - else { - logwrite( function, "ERROR in JSON message" ); - return ERROR; - } + /***** Thermal::Interface::handletopic_slicecamd **************************/ + /** + * @brief stash the slicecam CCD temperatures into externaldata + * @param[in] jmessage subscribed-received JSON message on Topic::SLICECAMD + * + */ + void Interface::handletopic_slicecamd( const nlohmann::json &jmessage ) { + this->process_external_data( jmessage ); + } + /***** Thermal::Interface::handletopic_slicecamd **************************/ - // no errors, so disseminate the message contents based on the message type - // - if ( messagetype == "acaminfo" ) { - this->process_key( jmessage, Key::Acamd::TANDOR ); - } - else - if ( messagetype == "slicecaminfo" ) { - this->process_key( jmessage, "TANDOR_SCAM_L" ); - this->process_key( jmessage, "TANDOR_SCAM_R" ); - } - else - if ( messagetype == "test" ) { - message.str(""); message << "received JSON test message: \"" << jmessage["test"].get() << "\""; - logwrite( function, message.str() ); - } - else { - message.str(""); message << "ERROR received unhandled JSON message type \"" << messagetype << "\""; - logwrite( function, message.str() ); - return ERROR; - } - } - catch ( const nlohmann::json::parse_error &e ) { - message.str(""); message << "ERROR json exception parsing message: " << e.what(); - logwrite( function, message.str() ); - return ERROR; + + /***** Thermal::Interface::request_snapshot *******************************/ + /** + * @brief ask subscribed daemons to re-publish their current status + * @details Publishes a SNAPSHOT request naming each daemon whose topic + * this daemon subscribes to. Each named daemon responds by + * publishing its own status, ensuring current telemetry is + * received even if the daemon published before I subscribed. + * + */ + void Interface::request_snapshot() { + nlohmann::json jmessage; + jmessage[Topic::ACAMD] = true; + jmessage[Topic::SLICECAMD] = true; + try { + this->publisher->publish( jmessage, Topic::SNAPSHOT ); } - catch ( const std::exception &e ) { - message.str(""); message << "ERROR parsing message: " << e.what(); - logwrite( function, message.str() ); - return ERROR; + catch( const std::exception &e ) { + logwrite( "Thermal::Interface::request_snapshot", + "ERROR publishing message: "+std::string(e.what()) ); } - - return NO_ERROR; } - /***** Thermal::Interface::handle_json_message ******************************/ + /***** Thermal::Interface::request_snapshot *******************************/ + + + + /***** Thermal::Interface::open_campbell ***********************************/ @@ -482,7 +435,6 @@ namespace Thermal { // If that is empty, or the arg is "force" then read all sensors now // if ( args=="force" || showdata.empty() ) { - this->get_external_telemetry(); this->lakeshore_readall(); this->campbell.read_data(); showdata.merge( this->externaldata ); diff --git a/thermald/thermal_interface.h b/thermald/thermal_interface.h index 9d3d0c8d..184c9788 100644 --- a/thermald/thermal_interface.h +++ b/thermald/thermal_interface.h @@ -9,6 +9,7 @@ #pragma once #include "message_keys.h" +#include "db_column_defs.h" #include "network.h" #include "logentry.h" #include "common.h" @@ -110,15 +111,58 @@ namespace Thermal { * */ class Interface { + private: + zmqpp::context context; + public: + Interface() + : context(), + is_subscriber_thread_running(false), + should_subscriber_thread_run(false) + { + topic_handlers = { + { Topic::SNAPSHOT, std::function( + [this](const nlohmann::json &msg) { handletopic_snapshot(msg); } ) }, + { Topic::ACAMD_TEMP, std::function( + [this](const nlohmann::json &msg) { handletopic_acamd(msg); } ) }, + { Topic::SLICECAMD, std::function( + [this](const nlohmann::json &msg) { handletopic_slicecamd(msg); } ) } + }; + } + + std::unique_ptr publisher; ///< publisher object + std::string publisher_address; ///< publish socket endpoint + std::string publisher_topic; ///< my default topic for publishing + std::unique_ptr subscriber; ///< subscriber object + std::string subscriber_address; ///< subscribe socket endpoint + std::vector subscriber_topics; ///< list of topics I subscribe to + std::atomic is_subscriber_thread_running; ///< is my subscriber thread running? + std::atomic should_subscriber_thread_run; ///< should my subscriber thread run? + std::unordered_map> topic_handlers; + ///< maps a handler function to each topic + + long init_pubsub(const std::initializer_list &topics={}) { + if (!subscriber) { + subscriber = std::make_unique(context, Common::PubSub::Mode::SUB); + } + return Common::PubSubHandler::init_pubsub(context, *this, topics); + } + void start_subscriber_thread() { Common::PubSubHandler::start_subscriber_thread(*this); } + void stop_subscriber_thread() { Common::PubSubHandler::stop_subscriber_thread(*this); } + + void handletopic_snapshot( const nlohmann::json &jmessage ); ///< respond to a snapshot request + void handletopic_acamd( const nlohmann::json &jmessage ); ///< stash acam CCD temperature into externaldata + void handletopic_slicecamd( const nlohmann::json &jmessage ); ///< stash slicecam CCD temperatures into externaldata + void publish_status(); ///< publish thermalinfo on Topic::THERMALD + void request_snapshot(); ///< ask subscribed daemons to publish their status + Common::Queue async; std::map lakeshore; ///< STL map of all Lakeshores indexed by LKS# Thermal::Campbell campbell; ///< Campbell object for datalogger - std::map telemetry_providers; ///< map of port[daemon_name] for external telemetry providers - std::mutex lakeshoredata_mtx; std::mutex telemdata_mtx; std::mutex externaldata_mtx; @@ -130,40 +174,13 @@ namespace Thermal { std::map externaldata; ///< map of telemetry received from other daemons /** - * @brief save a key=value pair to the externaldata map - * @details The template allows the compiler to automatically deduce - * the type of the value and store it in the externaldata - * map. The mysqlx::Value element supports multiple types and - * requires correct type assignment. - * @param[in] key jmessage key - * @param[in] value any type of value - */ - template - void save_to_externaldata( const std::string &key, const T &value ) { - this->externaldata[key] = value; - } - - /** - * @brief verifies key before saving to externaldata map - * @param[in] jmessage json message - * @param[in] key key to save + * @brief apply DB-bound JSON keys from a pub/sub message to externaldata + * @details Iterates DbColumnDefs::ExternalDataColumns; for each entry whose + * jkey appears in the message with a non-null value, stores the + * value into externaldata under the bound column name. + * @param[in] jmessage json message received via pub/sub */ - template - void process_key( const nlohmann::json &jmessage, const std::string &key ) { - if ( jmessage.contains(key) ) { - if ( !jmessage[key].is_null() ) { - try { - this->save_to_externaldata( key, jmessage[key].get() ); - } - catch ( const nlohmann::json::type_error &e ) { - logwrite( "Thermal::Interface::process_key", "ERROR key \""+key+"\" not expected type: "+e.what() ); - } - } - else { - logwrite( "Thermal::Interface::process_key", "ERROR bad key \""+key+"\"" ); - } - } - } + void process_external_data( const nlohmann::json &jmessage ); /** * @typedef thermal_t @@ -179,10 +196,6 @@ namespace Thermal { std::map thermal_info; ///< thermal info database, indexed by channel label - void make_telemetry_message( std::string &retstring ); ///< assembles JSON telemetry message - void get_external_telemetry(); ///< collect telemetry from other daemons - long handle_json_message( std::string message_in ); ///< parses incoming telemetry messages - long reconnect( std::string args, std::string &retstring ); ///< close,open all hardware devices /** diff --git a/thermald/thermal_server.cpp b/thermald/thermal_server.cpp index 6876f6cb..b34175cb 100644 --- a/thermald/thermal_server.cpp +++ b/thermald/thermal_server.cpp @@ -18,6 +18,8 @@ namespace Thermal { void Server::exit_cleanly(void) { std::string function = "Thermal::Server::exit_cleanly"; + this->interface.stop_subscriber_thread(); + logwrite( function, "closing Lakeshores" ); this->interface.close_lakeshores(); this->interface.close_campbell(); @@ -37,7 +39,7 @@ namespace Thermal { */ long Server::configure_thermald() { std::string function = "Thermal::Server::configure_thermald"; - std::stringstream message; + std::ostringstream message; int applied=0; long error; @@ -108,28 +110,25 @@ namespace Thermal { applied++; } - // TELEM_PROVIDER : contains daemon name and port to contact for header telemetry info + // PUB_ENDPOINT -- my ZeroMQ socket endpoint for publishing telemetry + // SUB_ENDPOINT -- the broker endpoint I subscribe to (for snapshot requests) // - if ( config.param[entry] == "TELEM_PROVIDER" ) { - std::vector tokens; - Tokenize( config.arg[entry], tokens, " " ); - try { - if ( tokens.size() == 2 ) { - this->interface.telemetry_providers[tokens.at(0)] = std::stod(tokens.at(1)); - } - else { - message.str(""); message << "ERROR bad format TELEM_PROVIDER=\"" << config.arg[entry] << "\": expected "; - logwrite( function, message.str() ); - return ERROR; - } - } - catch ( const std::exception &e ) { - message.str(""); message << "ERROR parsing TELEM_PROVIDER from " << config.arg[entry] << ": " << e.what(); - logwrite( function, message.str() ); - return ERROR; - } - message.str(""); message << "config:" << config.param[entry] << "=" << config.arg[entry]; - this->interface.async.enqueue_and_log( "THERMALD", function, message.str() ); + // NOTE: these two keys must be present in the thermald config file for + // publishing to work. Without PUB_ENDPOINT, init_pubsub() fails and + // no telemetry is published on Topic::THERMALD. + // + if ( config.param[entry] == "PUB_ENDPOINT" ) { + this->interface.publisher_address = config.arg[entry]; + this->interface.publisher_topic = DAEMON_NAME; // default publish topic is my name + message.str(""); message << DAEMON_NAME << ":config:" << config.param[entry] << "=" << config.arg[entry]; + this->interface.async.enqueue_and_log( function, message.str() ); + applied++; + } + + if ( config.param[entry] == "SUB_ENDPOINT" ) { + this->interface.subscriber_address = config.arg[entry]; + message.str(""); message << DAEMON_NAME << ":config:" << config.param[entry] << "=" << config.arg[entry]; + this->interface.async.enqueue_and_log( function, message.str() ); applied++; } @@ -165,7 +164,7 @@ namespace Thermal { long Server::parse_lks_unit( std::string &input, int &lksnum, std::string &name, std::string &host, int &port ) { std::string function = "Thermal::Server::parse_lks_unit"; - std::stringstream message; + std::ostringstream message; std::vector tokens; Tokenize( input, tokens, " \"" ); @@ -214,7 +213,7 @@ namespace Thermal { long Server::parse_lks_chan( std::string &input, int &lksnum, std::string &chan, bool &heater, std::string &label ) { std::string function = "Thermal::Server::parse_lks_chan"; - std::stringstream message; + std::ostringstream message; std::vector tokens; Tokenize( input, tokens, " \"" ); @@ -268,7 +267,7 @@ namespace Thermal { */ long Server::parse_camp_chan( std::string &input ) { std::string function = "Thermal::Server::parse_camp_chan"; - std::stringstream message; + std::ostringstream message; std::vector tokens; int chan=-1; std::string label="undef"; @@ -320,7 +319,7 @@ namespace Thermal { */ long Server::configure_telemetry() { std::string function = "Thermal::Server::configure_telemetry"; - std::stringstream message; + std::ostringstream message; int applied=0; long error; @@ -429,7 +428,7 @@ namespace Thermal { */ long Server::configure_devices() { std::string function = "Thermal::Server::configure_devices"; - std::stringstream message; + std::ostringstream message; int applied=0; long error; @@ -548,7 +547,7 @@ namespace Thermal { */ void Server::telemetry_watchdog( Thermal::Server &server ) { std::string function = "Thermal::Server::telemetry_watchdog"; - std::stringstream message; + std::ostringstream message; logwrite( function, "telemetry watchdog active" ); @@ -575,7 +574,16 @@ namespace Thermal { */ void Server::dothread_telemetry( Thermal::Server &server ) { std::string function = "Thermal::Server::dothread_telemetry"; - std::stringstream message; + std::ostringstream message; + + // in case of exception, back off before terminating so the + // watchdog's 1Hz respawn doesn't hot-spin error messages while + // the database is unreachable + // + auto backoff = [&]() { + std::this_thread::sleep_for( std::chrono::seconds( server.telem_backoff_sec ) ); + if ( ( server.telem_backoff_sec *= 2 ) > 30 ) server.telem_backoff_sec = 30; + }; logwrite( function, "telemetry thread running" ); @@ -586,7 +594,7 @@ namespace Thermal { // while ( server.telem_running ) { - logwrite( function, "NOTICE:thermald telemetry has started" ); + logwrite( function, "thermald telemetry has started" ); try { // Creating a Database object here connects to the database @@ -596,6 +604,10 @@ namespace Thermal { // Database::Database database( server.db_info ); + // connection succeeded; reset the reconnect backoff + // + server.telem_backoff_sec = 1; + int duration=server.telem_period; if ( duration >= 60 ) { // For 1 minute or more, @@ -610,23 +622,42 @@ namespace Thermal { while ( server.telem_sleeptimer.running() ) { // Gather the data, each source stores in its own map // - server.interface.get_external_telemetry(); // collect telemetry from other daemons server.interface.lakeshore_readall(); // read all Lakeshores server.interface.campbell.read_data(); // read Campbell CR1000 - // erase the telemdata map, - // timestamp it now, then merge each source into that + // snapshot externaldata under its own lock, then copy (not move) so + // the values received asynchronously via pub/sub persist between the + // updates that populate them. (merge() would move the nodes out, + // emptying externaldata.) // + std::map extcopy; + { + std::lock_guard extlock( server.interface.externaldata_mtx ); + extcopy = server.interface.externaldata; + } + + // erase the telemdata map, timestamp it now, then merge each source + // into that. Done under lock to exclude readers (publish_status() and + // show_telemdata()) from the concurrent map mutation. + // extcopy is a throwaway local, so it is safe to merge (move) from. + // + { + std::lock_guard lock( server.interface.telemdata_mtx ); server.interface.telemdata.clear(); server.interface.telemdata["datetime"] = get_datetime(); - server.interface.telemdata.merge( server.interface.externaldata ); + server.interface.telemdata.merge( extcopy ); server.interface.telemdata.merge( server.interface.campbell.datamap ); server.interface.telemdata.merge( server.interface.lakeshoredata ); + } // insert the telemdata map to the database // database.insert( server.interface.telemdata ); + // publish the latest readings to subscribers on Topic::THERMALD + // + server.interface.publish_status(); + server.telem_sleeptimer.sleepFor( std::chrono::seconds( duration ) ); timeout( 0, "sec" ); } @@ -636,23 +667,24 @@ namespace Thermal { catch ( const mysqlx::Error &err ) { message.str(""); message << "ERROR: " << err; logwrite( function, message.str() ); + backoff(); break; } catch ( std::exception &e ) { message.str(""); message << "ERROR: " << e.what(); logwrite( function, message.str() ); + backoff(); break; } catch ( ... ) { logwrite( function, "ERROR: unknown exception." ); + backoff(); break; } - - logwrite( function, "NOTICE:thermald telemetry has stopped" ); } server.telem_running = false; - logwrite( function, "NOTICE:thermald telemetry thread terminated" ); + logwrite( function, "thermald telemetry thread terminated" ); return; } /***** Thermal::Server::dothread_telemetry *********************************/ @@ -668,7 +700,7 @@ namespace Thermal { */ long Server::telemetry( std::string args, std::string &retstring ) { std::string function = "Thermal::Server::telemetry"; - std::stringstream message; + std::ostringstream message; // "?" or no arg displays usage and possible inputs, then return // @@ -736,7 +768,14 @@ namespace Thermal { auto newlogtime = next_occurrence( 12, 01, 00 ); std::this_thread::sleep_until( newlogtime ); close_log(); - init_log( logpath, Thermal::DAEMON_NAME ); + // retry the re-open on a short timer so a transient failure (missing + // datedir, permission/owner drift, full disk) doesn't silence logging + // for ~24h until the next rotation + while ( init_log( logpath, Thermal::DAEMON_NAME ) != 0 ) { + std::cerr << get_timestamp() << " (Thermal::Server::new_log_day) " + << "ERROR: log rotation failed to open new logfile; retrying in 60s\n"; + std::this_thread::sleep_for( std::chrono::seconds(60) ); + } // ensure it doesn't immediately re-open std::this_thread::sleep_for( std::chrono::seconds(1) ); } @@ -826,7 +865,7 @@ namespace Thermal { std::string message = thermal.interface.async.dequeue(); // get the latest message from the queue (blocks) retval = sock.Send(message); // transmit the message if (retval < 0) { - std::stringstream errstm; + std::ostringstream errstm; errstm << "error sending UDP message: " << message; logwrite(function, errstm.str()); } @@ -854,10 +893,10 @@ namespace Thermal { * Valid commands are listed in acamd_commands.h * */ - void Server::doit(Network::TcpSocket sock) { + void Server::doit(Network::TcpSocket &sock) { std::string function = "Thermal::Server::doit"; long ret; - std::stringstream message; + std::ostringstream message; std::string cmd, args; // arg string is everything after command std::vector tokens; @@ -947,7 +986,7 @@ namespace Thermal { } } catch ( const std::runtime_error &e ) { - std::stringstream errstream; errstream << e.what(); + std::ostringstream errstream; errstream << e.what(); message.str(""); message << "error parsing arguments: " << errstream.str(); logwrite(function, message.str()); ret = -1; @@ -1047,22 +1086,6 @@ namespace Thermal { if ( cmd == THERMALD_SHOWTELEM ) { ret = this->interface.show_telemdata( args, retstring ); } - else - - // send telemetry upon request - // - if ( cmd == TELEMREQUEST ) { - if ( args=="?" || args=="help" ) { - retstring=TELEMREQUEST+"\n"; - retstring.append( " Returns a serialized JSON message containing telemetry\n" ); - retstring.append( " information, terminated with \"EOF\\n\".\n" ); - ret=HELP; - } - else { - this->interface.make_telemetry_message( retstring ); - ret = JSON; - } - } // unknown commands generate an error // diff --git a/thermald/thermal_server.h b/thermald/thermal_server.h index 9af43b40..1b50f6c6 100644 --- a/thermald/thermal_server.h +++ b/thermald/thermal_server.h @@ -91,6 +91,7 @@ namespace Thermal { std::vector db_info; ///< info for constructing telemetry Database object std::atomic telem_running; ///< is the main telemetry thread running? + unsigned int telem_backoff_sec=1; ///< reconnect backoff (s); doubles to a cap on telemetry DB failure, reset on success std::mutex conn_mutex; ///< mutex to protect against simultaneous access to Accept() @@ -110,7 +111,7 @@ namespace Thermal { long parse_lks_chan( std::string &input, int &lksnum, std::string &chan, bool &heater, std::string &label ); long parse_camp_chan( std::string &input ); - void doit(Network::TcpSocket sock); ///< the workhorse of each thread connetion + void doit(Network::TcpSocket &sock); ///< the workhorse of each thread connetion }; /***** Thermal::Server ******************************************************/ diff --git a/thermald/thermald.cpp b/thermald/thermald.cpp index 26fccb1f..719df6b4 100644 --- a/thermald/thermald.cpp +++ b/thermald/thermald.cpp @@ -123,6 +123,24 @@ int main(int argc, char **argv) { thermald.exit_cleanly(); } + // initialize the pub/sub handler, subscribing to the camera daemons whose + // andor CCD temperatures I fold into my telemetry + // + if ( thermald.interface.init_pubsub( { Topic::ACAMD_TEMP, + Topic::SLICECAMD } ) == ERROR ) { + logwrite(function, "ERROR initializing publisher-subscriber handler"); + thermald.exit_cleanly(); + } + + // unconditionally publish current telemetry so the world knows I'm online, + // then request a snapshot so I collect the current status of those I + // subscribe to (in case they came online before I subscribed) + // + std::this_thread::sleep_for(std::chrono::milliseconds(250)); + thermald.interface.publish_status(); + std::this_thread::sleep_for(std::chrono::milliseconds(250)); + thermald.interface.request_snapshot(); + // This will pre-thread N_THREADS threads. // The 0th thread is reserved for the blocking port, and the rest are for the non-blocking port. // Each thread gets a socket object. All of the socket objects are stored in a vector container. diff --git a/utils/logentry.cpp b/utils/logentry.cpp index 0b95650e..0405b279 100644 --- a/utils/logentry.cpp +++ b/utils/logentry.cpp @@ -153,10 +153,12 @@ long init_log( std::string logpath, std::string name, bool stderr_in ) { * */ void close_log() { + std::lock_guard lock(loglock); // lock mutex to protect from concurrent logwrite() if (filestream.is_open() == true) { std::cerr << std::flush; filestream.flush(); filestream.close(); + filestream.clear(); // drop any sticky failbit/badbit so the next open() is usable } } /***** close_log **************************************************************/ diff --git a/utils/network.cpp b/utils/network.cpp index 5b6a2402..eec4bc8b 100644 --- a/utils/network.cpp +++ b/utils/network.cpp @@ -474,7 +474,7 @@ namespace Network { asyncflag = obj.asyncflag; totime = obj.totime; id = obj.id; - fd = obj.fd; + fd = obj.fd.load(); listenfd = obj.listenfd; host = obj.host; connection_open = obj.connection_open; @@ -524,6 +524,37 @@ namespace Network { /***** Network::TcpSocket::TcpSocket ****************************************/ + /***** Network::TcpSocket::operator= ****************************************/ + /** + * @brief TcpSocket copy-assignment operator + * @param[in] obj reference to class object + * @return reference to this object + * + * Memberwise copy, matching the implicitly-generated operator that existed + * before fd became std::atomic (which deletes the implicit one). The + * only difference is fd is loaded from the source atomic. addrs is copied + * shallowly, exactly as before; assignment is only used to (re)seed a fresh + * pre-Connect client socket, where addrs is null. + * + */ + TcpSocket& TcpSocket::operator=( const TcpSocket &obj ) { + if ( this != &obj ) { + port = obj.port; + blocking = obj.blocking; + asyncflag = obj.asyncflag; + totime = obj.totime; + id = obj.id; + fd = obj.fd.load(); + listenfd = obj.listenfd; + host = obj.host; + connection_open = obj.connection_open; + addrs = obj.addrs; + } + return *this; + } + /***** Network::TcpSocket::operator= ****************************************/ + + /***** Network::TcpSocket::Accept *******************************************/ /** * @brief creates a new connected socket for pending connection @@ -745,17 +776,23 @@ namespace Network { std::stringstream message; int error = -1; - if (this->fd >= 0) { // if the file descriptor is valid - if (close(this->fd) == 0) { // then close it + // Atomically claim the fd and reset it to -1 in one step, so that if two + // threads share this socket object only one of them ever calls close() on + // the descriptor. The loser sees -1 and no-ops, which prevents a double + // close() from closing an unrelated fd that the kernel has since recycled. + // + int closefd = this->fd.exchange( -1 ); + + if (closefd >= 0) { // if the file descriptor was valid + if (close(closefd) == 0) { // then close it #ifdef LOGLEVEL_DEBUG -// message.str(""); message << "[DEBUG] connection to " << this->host << "/" << this->port << " on fd " << this->fd << " closed"; -// logwrite( function, message.str() ); + message.str(""); message << "[DEBUG] connection to " << this->host << "/" << this->port << " on fd " << closefd << " closed"; + logwrite( function, message.str() ); #endif error = 0; - this->fd = -1; } else { - message.str(""); message << "ERROR closing fd " << this->fd << " on port " << this->port + message.str(""); message << "ERROR closing fd " << closefd << " on port " << this->port << " returned " << errno << ": " << strerror(errno); logwrite( function, message.str() ); error = -1; // error closing file descriptor @@ -995,8 +1032,10 @@ namespace Network { break; } if ( nread == 0 ) { - message << "ERROR no data from socket " << this->host << "/" << this->port << " on fd " << this->fd << ": closing connection"; +#ifdef LOGLEVEL_DEBUG + message << "[DEBUG] no data from socket " << this->host << "/" << this->port << " on fd " << this->fd << ": closing connection"; logwrite( function, message.str() ); +#endif this->Close(); break; } diff --git a/utils/network.h b/utils/network.h index f2b5637a..f9973d55 100644 --- a/utils/network.h +++ b/utils/network.h @@ -19,6 +19,7 @@ #include #include #include +#include #include // for ioctl, FIONREAD #include // for pollfd @@ -68,7 +69,7 @@ namespace Network { bool blocking; bool asyncflag; int totime; ///< timeout time for poll - int fd; ///< connected socket file descriptor + std::atomic fd; ///< connected socket file descriptor (atomic: shared across threads in some daemons) int listenfd; ///< listening socket file descriptor std::string host; bool connection_open; @@ -83,6 +84,7 @@ namespace Network { TcpSocket( std::string host, uint16_t port_in, bool block_in, bool async_in, int totime_in, int id_in); ///< useful constructor for a server TcpSocket( std::string host, uint16_t port ); ///< client constructor TcpSocket(const TcpSocket &obj); ///< copy constructor + TcpSocket& operator=(const TcpSocket &obj); ///< copy assignment (explicit: atomic fd deletes the implicit one) struct addrinfo *addrs; ///< dynamically allocated linked list returned by getaddrinfo() diff --git a/utils/seqgui/panels.py b/utils/seqgui/panels.py index edcb533e..ddc36f79 100644 --- a/utils/seqgui/panels.py +++ b/utils/seqgui/panels.py @@ -377,6 +377,16 @@ def set_camerad(self, data): if not self.readout_active: self._set_idle(self.lbl_readout) + def set_camerad_online(self, online): + """ Clear indicators when camerad goes offline (defense-in-depth). + Without this, the last-published exposing/readout state would + stick on the UI after camerad stopped publishing. """ + if not online: + self.exposing_active = False + self.readout_active = False + self._set_idle(self.lbl_exposing) + self._set_idle(self.lbl_readout) + def blink_tick(self, phase): """ Drive blink phase for any active camera indicators. """ if self.exposing_active: @@ -485,6 +495,23 @@ def set_slicecamd(self, data): if not self.running_state: self.lbl_running.set_not_ready() + def set_acamd_online(self, online): + """ Clear ACAM indicators when acamd goes offline (defense-in-depth). """ + if not online: + self.acquiring_active = False + self.acam_mode_badge.set_not_ready() + self.acam_guiding_badge.set_not_ready() + self.acam_acquired_badge.set_not_ready() + self.seeing.setText("seeing: --") + + def set_slicecamd_online(self, online): + """ Clear SLICECAM indicators when slicecamd goes offline. """ + if not online: + self.locked_state = False + self.running_state = False + self.lbl_locked.set_not_ready() + self.lbl_running.set_not_ready() + def blink_tick(self, phase): """ Drive blink phase for any active acquisition badges. """ # locked is a stable "done" state -- never blink, stay steady green diff --git a/utils/seqgui/seqgui.py b/utils/seqgui/seqgui.py index dd05d4ba..4f4ac5b0 100644 --- a/utils/seqgui/seqgui.py +++ b/utils/seqgui/seqgui.py @@ -111,7 +111,7 @@ def initialize_services(self): # Connect the signals from SeqguiZmqService to the appropriate slots self.zmq_service.seqstate_changed.connect(self.state_panel.set_state) self.zmq_service.waitstate_changed.connect(self._on_waitstate) - self.zmq_service.daemonstate_changed.connect(self.subsys_panel.set_daemonstate) + self.zmq_service.daemonstate_changed.connect(self._on_daemonstate) self.zmq_service.acamd_changed.connect(self.acq_panel.set_acamd) self.zmq_service.slicecamd_changed.connect(self.acq_panel.set_slicecamd) self.zmq_service.camerad_changed.connect(self.camera_panel.set_camerad) @@ -129,6 +129,15 @@ def _on_waitstate(self, state): self.state_panel.set_tcsop_active(bool(state.get(WAIT_TCSOP, False))) self.state_panel.set_user_active(bool(state.get(WAIT_USER, False))) + def _on_daemonstate(self, state): + """ Fan out a daemonstate update; clear stale indicators on dependent + panels when their owning daemon goes DOWN, otherwise the last + published acam/slicecam/camera state would stick on the UI. """ + self.subsys_panel.set_daemonstate(state) + self.camera_panel.set_camerad_online(bool(state.get("camerad", False))) + self.acq_panel.set_acamd_online(bool(state.get("acamd", False))) + self.acq_panel.set_slicecamd_online(bool(state.get("slicecamd", False))) + def _on_connection_error(self, msg): """ Surface a ZMQ connection error as an ERROR row in the log. """ self.log.append("ERROR", "seqgui", msg)