Skip to content

Resume/Continue failing #65

@yakirgb

Description

@yakirgb

Hi @adejanovski ,
Continue failing with Python error:

10:37:25 unbuffer /usr/local/bin/cstar continue 470ef4f5-d6a2-4372-a9e6-2430c30e22a9 -v 2>&1 | tee -a /var/log/cstar/cstar_RESUME_20210210-083724.log
10:37:25 Retry :  False
10:37:25 Resuming job 470ef4f5-d6a2-4372-a9e6-2430c30e22a9
10:37:25 Running  /usr/local/lib/python3.6/site-packages/cstar/resources/commands/run.sh
10:37:54 Traceback (most recent call last):
10:37:54   File "/usr/local/bin/cstar", line 8, in <module>
10:37:54     sys.exit(main())
10:37:54   File "/usr/local/lib/python3.6/site-packages/cstar/cstarcli.py", line 225, in main
10:37:54     namespace.func(namespace)
10:37:54   File "/usr/local/lib/python3.6/site-packages/cstar/cstarcli.py", line 70, in execute_continue
10:37:54     job.resume()
10:37:54   File "/usr/local/lib/python3.6/site-packages/cstar/job.py", line 332, in resume
10:37:54     self.run()
10:37:54   File "/usr/local/lib/python3.6/site-packages/cstar/job.py", line 344, in run
10:37:54     self.schedule_all_runnable_jobs()
10:37:54   File "/usr/local/lib/python3.6/site-packages/cstar/job.py", line 433, in schedule_all_runnable_jobs
10:37:54     next_host = self.state.find_next_host()
10:37:54   File "/usr/local/lib/python3.6/site-packages/cstar/state.py", line 75, in find_next_host
10:37:54     ignore_down_nodes=self.ignore_down_nodes)
10:37:54   File "/usr/local/lib/python3.6/site-packages/cstar/strategy.py", line 69, in find_next_host
10:37:54     return _strategy_mapping[strategy](remaining, endpoint_mapping, progress.running)
10:37:54   File "/usr/local/lib/python3.6/site-packages/cstar/strategy.py", line 83, in _topology_find_next_host
10:37:54     for next in endpoint_mapping[h]:
10:37:54 KeyError: Host(fqdn='1.1.1.1', ip='1.1.1.1', dc='UND', cluster='UND Cluster', rack='RAC1', is_up=True, host_id='259fc144-0a35-4e92-9cb6-09f099c911df')
12:32:46 
12:32:46 Aborted

job.json file looks like:

{
    "cache_directory": "/var/lib/jk/.cstar/cache",
    "command": "/usr/local/lib/python3.6/site-packages/cstar/resources/commands/run.sh",
    "creation_timestamp": 1612946274,
    "env": {
        "COMMAND": "/home/cassandra/scripts/cassandra_restart.sh \"/home/cassandra/scripts/find_expired_files.sh\""
    },
    "errors": [],
    "hosts_variables": {},
    "is_preheated": false,
    "jmx_username": null,
    "job_runner": "RemoteJobRunner",
    "key_space": null,
    "output_directory": "/var/lib/jk/.cstar/jobs/470ef4f5-d6a2-4372-a9e6-2430c30e22a9",
    "resolve_hostnames": false,
    "returned_jobs": [],
    "schema_versions": [
        "b1066b3a-c020-3e13-afde-b977369eb723"
    ],
    "sleep_after_done": null,
    "sleep_on_new_runner": 0.5,
    "ssh_identity_file": "/var/lib/jk/.ssh/id_rsa",
    "ssh_lib": "ssh2",
    "ssh_password": null,
    "ssh_username": "root",
    "state": {
        "cluster_parallel": true,
        "current_topology": [

....long list
                [
                    "4.4.4.4",
                    "4.4.4.4",
                    "UND",
                    "UND Cluster",
                    "RAC1",
                    true,
                    "9544d7e9-ebdc-40fa-9532-ca37b0ed7d80"
                ],
                [
                    "3.3.3.3",
                    "3.3.3.3",
                    "UND",
                    "UND Cluster",
                    "RAC1",
                    true,
                    "4e8b75c9-e3c0-4447-bc5f-a5198a51d416"
                ]
            ],
            "failed": [
                [
                    "2.2.2.2",
                    "2.2.2.2",
                    "UND",
                    "UND Cluster",
                    "RAC1",
                    true,
                    "52660ead-5bd4-4bb5-9d27-7dc5f31b8d1a"
                ]
            ],
            "running": [
                [
                    "1.1.1.1",
                    "1.1.1.1",
                    "UND",
                    "UND Cluster",
                    "RAC1",
                    true,
                    "259fc144-0a35-4e92-9cb6-09f099c911df"
                ]
            ]
        },
        "strategy": "topology"
    },
    "status_topology_hash": [
        "1bac2bdafcc8201f0fbf2f1f023f8cba"
    ],
    "timeout": null,
    "version": 8
}

Thank you, Yakir Gibraltar

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions