-
Notifications
You must be signed in to change notification settings - Fork 5
Script for checking host and key status #797
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
PawelPlesniak
merged 3 commits into
develop
from
PawelPlesniak/CheckServerStatusAndUserKeys
Mar 16, 2026
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,389 @@ | ||
| import os | ||
| from concurrent.futures import ThreadPoolExecutor, as_completed | ||
|
|
||
| import paramiko | ||
| from rich import box | ||
| from rich.console import Console | ||
| from rich.live import Live | ||
| from rich.table import Table | ||
|
|
||
| # List of NP0x cluster hosts to check | ||
| NP0X_CLUSTER_HOSTS = sorted( | ||
| [ | ||
| "np02-srv-001", | ||
| "np02-srv-002", | ||
| "np02-srv-003", | ||
| "np02-srv-004", | ||
| "np02-srv-005", | ||
| "np04-srv-001", | ||
| "np04-srv-002", | ||
| "np04-srv-003", | ||
| "np04-srv-004", | ||
| "np04-srv-005", | ||
| "np04-srv-011", | ||
| "np04-srv-012", | ||
| "np04-srv-013", | ||
| "np04-srv-014", | ||
| "np04-srv-015", | ||
| "np04-srv-016", | ||
| "np04-srv-017", | ||
| "np04-srv-018", | ||
| "np04-srv-019", | ||
| "np04-srv-021", | ||
| "np04-srv-022", | ||
| "np04-srv-024", | ||
| "np04-srv-026", | ||
| "np04-srv-028", | ||
| "np04-srv-029", | ||
| "np04-srv-030", | ||
| "np04-srv-031", | ||
| ] | ||
| ) | ||
|
|
||
|
|
||
| class TrackingAutoAddPolicy(paramiko.MissingHostKeyPolicy): | ||
| """ | ||
| Custom policy to track missing host keys and update the result dict accordingly. | ||
| """ | ||
|
|
||
| def __init__(self, result_dict): | ||
| """ | ||
| Initialize with a reference to the result dictionary to update key status. | ||
| """ | ||
| self.result_dict = result_dict | ||
|
|
||
| def missing_host_key(self, client, hostname: str, key: paramiko.PKey) -> None: | ||
| """ | ||
| When a host key is missing, update the result dictionary to indicate that the | ||
| key is being added. | ||
| """ | ||
| # Update the result dictionary to reflect the missing key status | ||
| self.result_dict["ssh_key_status"] = "ADD KEY TO KNOWN_HOSTS" | ||
| self.result_dict["ssh_key_color"] = "bold yellow" | ||
|
|
||
|
|
||
| def load_ssh_config() -> paramiko.SSHConfig: | ||
| """ | ||
| Load the user's SSH configuration from ~/.ssh/config using Paramiko's SSHConfig | ||
| class. | ||
|
|
||
| This function reads the SSH configuration file and parses it to create an SSHConfig | ||
| object that can be used to look up host-specific settings when connecting to hosts. | ||
|
|
||
| Args: | ||
| None | ||
|
|
||
| Returns: | ||
| paramiko.SSHConfig: An SSHConfig object containing the parsed SSH configuration. | ||
|
|
||
| Raises: | ||
| FileNotFoundError: If the SSH configuration file is not found at the expected | ||
| location. | ||
| """ | ||
| # Determine the absolute path to the SSH configuration file. | ||
| config_path = os.path.expanduser("~/.ssh/config") | ||
| if not os.path.exists(config_path): | ||
| raise FileNotFoundError(f"SSH config file not found at {config_path}") | ||
|
|
||
| # Create an SSHConfig object and parse the SSH configuration file to populate it | ||
| # with the host-specific settings. | ||
| ssh_config = paramiko.SSHConfig() | ||
|
|
||
| # Open the SSH configuration file and parse it to populate the SSHConfig object. | ||
| with open(config_path) as f: | ||
| try: | ||
| ssh_config.parse(f) | ||
| except Exception as e: | ||
| raise Exception(f"Error parsing SSH config file: {e}") | ||
|
|
||
| return ssh_config | ||
|
|
||
|
|
||
| def get_host_info(host_alias: str, ssh_config: paramiko.SSHConfig) -> dict: | ||
| """ | ||
| Connect to a host using Paramiko and retrieve its status, key verification status, | ||
| CPU vendor, and uptime information. | ||
|
|
||
| This function attempts to establish an SSH connection to the specified host alias | ||
| using the provided SSH configuration. It checks the host key against known hosts, | ||
| retrieves CPU vendor and uptime information, and handles various exceptions to | ||
| determine the host's status. | ||
|
|
||
| Args: | ||
| host_alias (str): The alias of the host to connect to, as defined in the SSH | ||
| configuration. | ||
| ssh_config (paramiko.SSHConfig): An SSHConfig object containing the parsed | ||
| SSH configuration to use for looking up host-specific settings. | ||
|
|
||
| Returns: | ||
| dict: A dictionary containing the host's alias, connection status, key | ||
| verification status, CPU vendor, uptime, and any additional details. The keys | ||
| in the dictionary include: | ||
| - "alias": The host alias used for the connection. | ||
| - "status": The connection status, which can be "UP", "OFFLINE", or | ||
| "SCANNING". | ||
| - "key_status": The status of the host key verification, which can be | ||
| "Verified", "MISMATCH", or "Not Verified". | ||
| - "key_color": The color code to use for displaying the key status. | ||
| - "vendor": The CPU vendor string retrieved from the host. | ||
| - "cpu_color": The color code to use for displaying the CPU vendor in the | ||
| UI. | ||
| - "uptime": The uptime string retrieved from the host. | ||
| - "details": Additional details about the host, such as CPU model | ||
| information, which is initialized as an empty string and can be | ||
| populated based on the command output. | ||
| Raises: | ||
| paramiko.BadHostKeyException: If the host key does not match the expected key in | ||
| the known hosts file, indicating a potential security issue. | ||
| Exception: Any other exceptions that occur during the connection attempt, which | ||
| will be handled to indicate that the host is offline or the key is not | ||
| """ | ||
| # Initialize the SSH client and the result dictionary with default values | ||
| client = paramiko.SSHClient() | ||
| result = { | ||
| "alias": host_alias, | ||
| "status": "SCANNING", | ||
| "ssh_key_status": "Unknown", | ||
| "ssh_key_color": "red", | ||
| "cpu_color": "dim white", | ||
| "uptime": "", | ||
| "details": "", | ||
| } | ||
|
|
||
| # Load system host keys to ensure we have the latest known hosts information. If | ||
| # this fails, use the default behavior of the SSH client, which will handle missing | ||
| # keys according to the policy set below. | ||
| try: | ||
| client.load_system_host_keys() | ||
| except Exception: | ||
| pass | ||
|
|
||
| # Set the custom missing host key policy to track and update the result dictionary | ||
| client.set_missing_host_key_policy(TrackingAutoAddPolicy(result)) | ||
| # Check if we already have a key for this hostname in our loaded keys | ||
| # If we don't, the policy WILL be triggered during connect() | ||
| known_keys = client.get_host_keys() | ||
| if host_alias in known_keys: | ||
| result["ssh_key_status"] = "Verified" | ||
| result["ssh_key_color"] = "green" | ||
| else: | ||
| # This will be updated by the policy the moment client.connect() runs. | ||
| result["ssh_key_status"] = "Missing" | ||
| result["ssh_key_color"] = "yellow" | ||
|
|
||
| # Look up the host configuration from the SSH config using the provided alias. This | ||
| # will allow us to retrieve the real hostname, username, port, and key file to use | ||
| # for the connection. If the alias is not found in the SSH config, we will use the | ||
| # alias itself as the hostname. | ||
| host_conf = ssh_config.lookup(host_alias) | ||
|
|
||
| # Determine the real hostname to connect to. If the SSH config provides a "hostname" | ||
| # entry for this alias, use that; otherwise, use the alias itself as the hostname. | ||
| hostname = host_conf.get("hostname", host_alias) | ||
|
|
||
| # Attempt to connect to the host using the SSH client. If the connection is | ||
| # successful, execute the command to retrieve CPU and uptime information, parse the | ||
| # output, and update the result dictionary accordingly. If exceptions occur during | ||
| # the connection attempt, handle them to update the result dictionary accordingly. | ||
| try: | ||
| # Prepare the connection arguments based on the SSH config. | ||
| connect_args = { | ||
| "hostname": hostname, | ||
| "username": host_conf.get("user", os.getlogin()), | ||
| "port": int(host_conf.get("port", 22)), | ||
| "timeout": 5, | ||
| "key_filename": host_conf.get("identityfile", None), | ||
| } | ||
|
|
||
| # Attempt to establish an SSH connection to the host using the prepared | ||
| # arguments. If the host key is missing, the custom policy will handle it and | ||
| # update the result dict. | ||
| client.connect(**connect_args) | ||
|
|
||
| # If the connection is successful, execute the command to retrieve CPU and | ||
| # uptime information. | ||
| _, stdout, _ = client.exec_command("lscpu && uptime -p") | ||
| cmd_output = stdout.read().decode().strip() | ||
|
|
||
| # If we reach this point, the connection was successful and the host key was | ||
| # verified. | ||
| result["status"] = "UP" | ||
| if result["ssh_key_status"] != "ADD KEY TO KNOWN_HOSTS": | ||
| result["ssh_key_status"] = "Verified" | ||
| result["ssh_key_color"] = "green" | ||
|
|
||
| # Parse the command output to extract CPU vendor, model details, and uptime. | ||
| for line in cmd_output.splitlines(): | ||
| if line.startswith("up "): | ||
| result["uptime"] = line.replace("up ", "") | ||
| if "Vendor ID:" in line: | ||
| v = line.split(":")[1].strip() | ||
| if "AuthenticAMD" in v: | ||
| result["cpu_color"] = "bold red" | ||
| elif "GenuineIntel" in v: | ||
| result["cpu_color"] = "bold blue" | ||
| if "Model name:" in line: | ||
| result["details"] = line.split(":")[1].strip() | ||
|
|
||
| # Handle the case where the host key does not match the expected key in the known | ||
| # hosts file. This indicates a potential security issue, and we will update the | ||
| # result dictionary to reflect that the key is a mismatch and the host is down. | ||
| except paramiko.BadHostKeyException: | ||
| result["status"] = "DOWN" | ||
| result["ssh_key_status"] = "MISMATCH" | ||
| result["ssh_key_color"] = "bold red" | ||
|
|
||
| # Handle authentication failures, which indicate that the host is offline or the key | ||
| # is not valid for this host. | ||
| except paramiko.AuthenticationException: | ||
| result["status"] = "OFFLINE" | ||
| result["details"] = "Auth Failed (Key/Pass)" | ||
|
|
||
| # Handle SSH exceptions, which can occur for various reasons such as network issues, | ||
| # SSH service not running on the host, or other SSH-related problems. | ||
| except paramiko.SSHException as e: | ||
| result["status"] = "OFFLINE" | ||
| result["details"] = f"SSH Error: {str(e)[:20]}" | ||
|
|
||
| # Handle any other exceptions that occur during the connection attempt. Treat this | ||
| # as an indication that the host is offline or unreachable. | ||
| except Exception: | ||
| result["status"] = "OFFLINE" | ||
| result["ssh_key_status"] = "Unknown" | ||
| result["ssh_key_color"] = "dim white" | ||
| result["uptime"] = "Unknown" | ||
| result["details"] = "Unknown" | ||
|
|
||
| finally: | ||
| client.close() | ||
|
|
||
| return result | ||
|
|
||
|
|
||
| def generate_table(results_map: dict[str, str]) -> Table: | ||
| """ | ||
| Generate a Rich Table object to display the status of the NP0x cluster hosts. | ||
|
|
||
| This function creates a Rich Table with columns for Host, Status, User SSH Key | ||
| Status, CPU Model/Details, and Uptime. It iterates through results_map, and updates | ||
| the table rows based on the its content. | ||
|
|
||
| Args: | ||
| results_map (dict[str, str]): A dictionary mapping host aliases to their status | ||
| information, including connection status, key verification status, CPU | ||
| details, and uptime. | ||
|
|
||
| Returns: | ||
| Table: A Rich Table object populated with the status information for each host | ||
| in the NP0x cluster, ready to be rendered in the console. | ||
| """ | ||
|
|
||
| # QOL feature | ||
| up_count = sum(1 for res in results_map.values() if res["status"] == "UP") | ||
| total_hosts = len(results_map) | ||
|
|
||
| # Create a Rich Table with appropriate columns and styling to display the host | ||
| # status information. | ||
| table = Table( | ||
| title=f"ProtoDUNE Cluster [bold cyan]({up_count}/{total_hosts} Online)[/]", | ||
| box=box.ROUNDED, | ||
| ) | ||
| table.add_column("Host", style="cyan", no_wrap=True, justify="center") | ||
| table.add_column("User SSH Key Status", justify="center") | ||
| table.add_column("Status", justify="center") | ||
| table.add_column("CPU Model / Details", justify="center", style="dim white") | ||
| table.add_column("Uptime", justify="center", style="dim white") | ||
|
|
||
| # Iterate through the results_map and add a row to the table for each host. | ||
| for host in NP0X_CLUSTER_HOSTS: | ||
| res = results_map.get(host) | ||
|
|
||
| # Format the status string based on the connection status of the host. | ||
| if res["status"] == "UP": | ||
| status_str = "[bold green]ONLINE[/]" | ||
| elif res["status"] == "OFFLINE": | ||
| status_str = "[bold red]OFFLINE[/]" | ||
| else: | ||
| status_str = "[bold yellow]SCANNING[/]" | ||
|
|
||
| # Format the key status string with appropriate color based on the key | ||
| # verification status. | ||
| key_str = f"[{res['ssh_key_color']}]{res['ssh_key_status']}[/]" | ||
|
|
||
| # Format the CPU details string with appropriate color based on the CPU vendor. | ||
| # If details are not available, show "..." instead. | ||
| details_str = f"[{res['cpu_color']}]{res['details']}[/]" | ||
|
|
||
| # Add the row to the table. | ||
| table.add_row(res["alias"], key_str, status_str, details_str, res["uptime"]) | ||
|
|
||
| return table | ||
|
|
||
|
|
||
| def main(): | ||
| """ | ||
| Main function to execute the NP0x cluster status check and display results in a | ||
| live-updating table. | ||
|
|
||
| This function initializes the console and SSH configuration, sets up a results map | ||
| to track the status of each host, and uses a ThreadPoolExecutor to concurrently | ||
| check the status of each host in the NP0x cluster. The results are displayed in a | ||
| live-updating table using Rich's Live feature, which refreshes the display as | ||
| results come in. | ||
|
|
||
| Args: | ||
| None | ||
|
|
||
| Returns: | ||
| None | ||
|
|
||
| Raises: | ||
| Any exceptions that occur during the execution of the host checks will be | ||
| handled within the get_host_info function. | ||
| """ | ||
|
|
||
| # Initialize the console for Rich output and load the SSH configuration. | ||
| console = Console() | ||
| console.print("") # Buffer line for better aesthetics | ||
| ssh_config = load_ssh_config() | ||
|
|
||
| # Define the default results map with initial values for each host. This map will be | ||
| # updated as results come in from the concurrent checks. | ||
| results_map = { | ||
| host: { | ||
| "alias": host, | ||
| "status": "WAITING", | ||
| "ssh_key_status": "Pending", | ||
| "ssh_key_color": "dim white", | ||
| "cpu_color": "dim white", | ||
| "uptime": "...", | ||
| "details": "...", | ||
| } | ||
| for host in NP0X_CLUSTER_HOSTS | ||
| } | ||
|
|
||
| # Use Rich's Live to create a live-updating table. The table will be refreshed as | ||
| # results come in from the concurrent checks. | ||
| with Live( | ||
| generate_table(results_map), console=console, refresh_per_second=10 | ||
| ) as live: | ||
| with ThreadPoolExecutor(max_workers=15) as executor: | ||
| # Map each host to a future that will execute the get_host_info function | ||
| # concurrently. | ||
| future_to_host = { | ||
| executor.submit(get_host_info, host, ssh_config): host | ||
| for host in NP0X_CLUSTER_HOSTS | ||
| } | ||
|
|
||
| # As each future completes, update the results map with the new information | ||
| # and refresh the live table to reflect the updated status of the hosts. | ||
| for future in as_completed(future_to_host): | ||
| host_alias = future_to_host[future] | ||
| results_map[host_alias] = future.result() | ||
| live.update(generate_table(results_map)) | ||
|
|
||
| console.print("\n[bold green]Scan Complete.[/]") | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| main() | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.