diff --git a/netbox_config_backup/backup/processing.py b/netbox_config_backup/backup/processing.py index ccb436e..5a2fffa 100644 --- a/netbox_config_backup/backup/processing.py +++ b/netbox_config_backup/backup/processing.py @@ -65,21 +65,18 @@ def run_backup(job_id): raise e if ip: - logger.debug( - f'Trying to connect to device {backup.device} with ip {ip} for {job}' - ) + logger.debug(f'Trying to connect to device {backup.device} with ip {ip} for {job}') try: d = napalm_init(backup.device, ip) except (TimeoutError, ServiceUnavailable): job.status = JobStatusChoices.STATUS_FAILED - job.data = { - 'error': f'Timeout Connecting to {backup.device} with ip {ip}' - } + job.data = {'error': f'Timeout Connecting to {backup.device} with ip {ip}'} logger.debug(f'Timeout Connecting to {backup.device} with ip {ip}') job.save() return logger.debug(f'Connected to {backup.device} with ip {ip} for {job}') job.status = JobStatusChoices.STATUS_RUNNING + job.pid = pid job.started = timezone.now() job.save() try: @@ -118,16 +115,12 @@ def run_backup(job_id): configs = d.get_config() logger.debug(f'Committing config for {backup}') commit = backup.set_config(configs) - logger.debug( - f'Committed config for {backup} with {commit}; closing connection for {backup}' - ) + logger.debug(f'Committed config for {backup} with {commit}; closing connection for {backup}') d.close() logger.debug(f'Scheduling next backup for {backup}') frequency = timedelta( - seconds=settings.PLUGINS_CONFIG.get('netbox_config_backup', {}).get( - 'frequency', 3600 - ) + seconds=settings.PLUGINS_CONFIG.get('netbox_config_backup', {}).get('frequency', 3600) ) new = BackupJob( runner=None, @@ -140,10 +133,14 @@ def run_backup(job_id): new.full_clean() new.save() - logger.info(f'{backup}: Backup complete') + logger.info(f'{backup}: Next scheduled') + job.status = JobStatusChoices.STATUS_COMPLETED job.completed = timezone.now() + job.full_clean() job.save() + + logger.info(f'{backup}: Backup complete') remove_stale_backupjobs(job=job) else: logger.debug(f'{backup}: No IP set') diff --git a/netbox_config_backup/jobs/backup.py b/netbox_config_backup/jobs/backup.py index c72f7bd..ea931a3 100644 --- a/netbox_config_backup/jobs/backup.py +++ b/netbox_config_backup/jobs/backup.py @@ -26,7 +26,7 @@ job_frequency = settings.PLUGINS_CONFIG.get('netbox_config_backup', {}).get('frequency', 3600) -@system_job(interval=JobIntervalChoices.INTERVAL_MINUTELY * 15) +@system_job(interval=JobIntervalChoices.INTERVAL_MINUTELY * 5) class BackupRunner(JobRunner): processes = {} @@ -115,31 +115,33 @@ def schedule_jobs(cls, runner, backup=None, device=None): return scheduled_status - def run_processes(self): - if not self.running: - self.handle_main_exit(signal.SIGTERM, None) - jobs = BackupJob.objects.filter( + def get_scheduled_jobs(self): + return BackupJob.objects.filter( runner=None, status=JobStatusChoices.STATUS_SCHEDULED, scheduled__lte=timezone.now(), ) + + def run_processes(self): + if not self.running: + self.handle_main_exit(signal.SIGTERM, None) + jobs = self.get_scheduled_jobs() for job in jobs: job.runner = self.job job.status = JobStatusChoices.STATUS_PENDING - close_db() BackupJob.objects.bulk_update(jobs, ['runner', 'status']) self.job.data.update({'status': {'pending': jobs.count()}}) self.job.clean() self.job.save() + close_db() + for job in jobs: try: process = self.fork_process(job) process.join(1) - job.pid = process.pid - job.status = JobStatusChoices.STATUS_RUNNING except Exception as e: try: import sentry_sdk @@ -149,9 +151,9 @@ def run_processes(self): pass job.status = JobStatusChoices.STATUS_FAILED job.data['error'] = str(e) - + job.full_clean() + job.save() close_db() - BackupJob.objects.bulk_update(jobs, ['pid', 'status', 'data']) def run_backup(self, job_id): self.job_id = job_id @@ -166,7 +168,7 @@ def fork_process(self, job): return close_db() process = self.ctx.Process( - target=run_backup, + target=self.run_backup, args=(job.pk,), ) data = {job.backup.pk: {'process': process, 'backup': job.backup.pk, 'job': job.pk}} @@ -194,7 +196,6 @@ def handle_stuck_jobs(self): def handle_processes(self): for pk in list(self.processes.keys()): - terminated = self.job.data.get('status', {}).get('terminated', 0) completed = self.job.data.get('status', {}).get('completed', 0) process = self.processes.get(pk, {}).get('process') @@ -205,19 +206,25 @@ def handle_processes(self): process.terminate() del self.processes[pk] job = BackupJob.objects.filter(pk=job_pk).first() + job.refresh_from_db() if job and job.status not in [ JobStatusChoices.STATUS_COMPLETED, JobStatusChoices.STATUS_FAILED, JobStatusChoices.STATUS_ERRORED, ]: - self.job.data.update({'status': {'terminated': terminated}}) - job.status = JobStatusChoices.STATUS_ERRORED - if not job.data: - job.data = {} - job.data.update({'error': 'Process terminated for unknown reason'}) + logger.debug(f'Job status not completed for {backup}: {job.status}') else: - self.job.data.update({'status': {'completed': completed}}) - job.save() + job.data.update( + { + 'status': {'completed': completed}, + 'job': { + 'status': job.status, + 'pid': process.pid, + 'exitcode': process.exitcode, + }, + } + ) + job.save() self.job.save() self.job.refresh_from_db() @@ -278,6 +285,7 @@ def run(self, backup=None, device=None, *args, **kwargs): self.job.data.update({'status': {'scheduled': status}}) self.job.save() + self.run_processes() self.handle_processes() diff --git a/netbox_config_backup/management/commands/processbackup.py b/netbox_config_backup/management/commands/processbackup.py new file mode 100644 index 0000000..b229254 --- /dev/null +++ b/netbox_config_backup/management/commands/processbackup.py @@ -0,0 +1,27 @@ +from django.core.management.base import BaseCommand +from django.utils import timezone + +from netbox_config_backup.jobs.backup import BackupRunner +from netbox_config_backup.models import Backup + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument('--time', dest='time', help="time") + parser.add_argument('--device', dest='device', help="Device Name", required=True) + + def handle(self, *args, **options): + print(f'Running backup for: {options.get("device")}') + backup = Backup.objects.filter(device__name=options['device']).first() + if not backup: + backup = Backup.objects.filter(name=options['device']).first() + + if backup: + job = backup.jobs.last() + job.scheduled = timezone.now() + job.clean() + job.save() + BackupRunner.enqueue(backup=backup, immediate=True) + # run_backup(job.pk) + else: + raise Exception('Device not found') diff --git a/netbox_config_backup/management/commands/runbackup.py b/netbox_config_backup/management/commands/runbackup.py index 349a68a..b65a265 100644 --- a/netbox_config_backup/management/commands/runbackup.py +++ b/netbox_config_backup/management/commands/runbackup.py @@ -1,8 +1,9 @@ from django.core.management.base import BaseCommand +from django.utils import timezone from core.choices import JobStatusChoices from netbox_config_backup.jobs.backup import BackupRunner -from netbox_config_backup.models import Backup +from netbox_config_backup.models import Backup, BackupJob class Command(BaseCommand): @@ -21,11 +22,14 @@ def handle(self, *args, **options): backup = Backup.objects.filter(name=options['device']).first() if backup: if options.get('time') == 'now': - for job in backup.jobs.filter( - status__in=JobStatusChoices.ENQUEUED_STATE_CHOICES - ): + for job in backup.jobs.filter(status__in=JobStatusChoices.ENQUEUED_STATE_CHOICES): print(f'Clearing old jobs: {job}') job.status = JobStatusChoices.STATUS_ERRORED + job.data = ( + {'error': 'Clearing stuck job'} + if not job.data + else job.data.update({'error': 'Clearing stuck job'}) + ) job.clean() job.save() @@ -33,4 +37,11 @@ def handle(self, *args, **options): else: raise Exception('Device not found') else: + if options['time'] == 'now': + print('Setting all scheduled jobs to start immediately') + jobs = BackupJob.objects.filter(status=JobStatusChoices.STATUS_SCHEDULED) + for job in jobs: + job.scheduled = timezone.now() + BackupJob.objects.bulk_update(jobs, ['scheduled']) + self.run_backup() diff --git a/netbox_config_backup/urls.py b/netbox_config_backup/urls.py index 1ebee7f..a589966 100644 --- a/netbox_config_backup/urls.py +++ b/netbox_config_backup/urls.py @@ -8,7 +8,7 @@ path('backups//', include(get_model_urls('netbox_config_backup', 'backup'))), path('devices/', include(get_model_urls('netbox_config_backup', 'backup', detail=False))), path('devices//', include(get_model_urls('netbox_config_backup', 'backup'))), - path('devices//config/', views.DiffView.as_view(), name='backup_config'), + path('devices//config/', views.ConfigView.as_view(), name='backup_config'), path('devices//diff/', views.DiffView.as_view(), name='backup_diff'), path('devices//diff//', views.DiffView.as_view(), name='backup_diff'), path('jobs/', include(get_model_urls('netbox_config_backup', 'backupjob', detail=False))),