diff --git a/netbox/core/exceptions.py b/netbox/core/exceptions.py new file mode 100644 index 000000000..8412b0378 --- /dev/null +++ b/netbox/core/exceptions.py @@ -0,0 +1,2 @@ +class SyncError(Exception): + pass diff --git a/netbox/core/jobs.py b/netbox/core/jobs.py index e5abe6aae..ab9cc4b5e 100644 --- a/netbox/core/jobs.py +++ b/netbox/core/jobs.py @@ -2,6 +2,7 @@ import logging from extras.choices import JobResultStatusChoices from .choices import * +from .exceptions import SyncError from .models import DataSource logger = logging.getLogger(__name__) @@ -16,9 +17,8 @@ def sync_datasource(job_result, *args, **kwargs): try: job_result.start() datasource.sync() - except Exception: + except SyncError as e: job_result.set_status(JobResultStatusChoices.STATUS_ERRORED) job_result.save() - datasource.status = DataSourceStatusChoices.FAILED - datasource.save() - logging.error(f"Error during syncing of data source {datasource}") + DataSource.objects.filter(pk=datasource.pk).update(status=DataSourceStatusChoices.FAILED) + logging.error(e) diff --git a/netbox/core/migrations/0001_initial.py b/netbox/core/migrations/0001_initial.py index 327c322e6..900b7393a 100644 --- a/netbox/core/migrations/0001_initial.py +++ b/netbox/core/migrations/0001_initial.py @@ -1,5 +1,6 @@ -# Generated by Django 4.1.5 on 2023-01-26 19:46 +# Generated by Django 4.1.5 on 2023-01-27 18:15 +import django.core.validators from django.db import migrations, models import django.db.models.deletion @@ -16,15 +17,19 @@ class Migration(migrations.Migration): name='DataSource', fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False)), + ('created', models.DateTimeField(auto_now_add=True, null=True)), + ('last_updated', models.DateTimeField(auto_now=True, null=True)), ('name', models.CharField(max_length=100, unique=True)), ('type', models.CharField(default='local', max_length=50)), + ('url', models.CharField(max_length=200)), + ('status', models.CharField(default='new', editable=False, max_length=50)), ('enabled', models.BooleanField(default=True)), ('description', models.CharField(blank=True, max_length=200)), - ('url', models.CharField(max_length=200)), + ('git_branch', models.CharField(blank=True, max_length=100)), ('ignore_rules', models.TextField(blank=True)), ('username', models.CharField(blank=True, max_length=100)), ('password', models.CharField(blank=True, max_length=100)), - ('git_branch', models.CharField(blank=True, max_length=100)), + ('last_synced', models.DateTimeField(blank=True, editable=False, null=True)), ], options={ 'ordering': ('name',), @@ -34,10 +39,10 @@ class Migration(migrations.Migration): name='DataFile', fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False)), - ('path', models.CharField(editable=False, max_length=1000, unique=True)), + ('path', models.CharField(editable=False, max_length=1000)), ('last_updated', models.DateTimeField(editable=False)), ('size', models.PositiveIntegerField(editable=False)), - ('hash', models.CharField(editable=False, max_length=64)), + ('hash', models.CharField(editable=False, max_length=64, validators=[django.core.validators.RegexValidator(message='Length must be 64 hexadecimal characters.', regex='^[0-9a-f]{64}$')])), ('data', models.BinaryField()), ('source', models.ForeignKey(editable=False, on_delete=django.db.models.deletion.CASCADE, related_name='datafiles', to='core.datasource')), ], diff --git a/netbox/core/migrations/0002_datasource_last_synced.py b/netbox/core/migrations/0002_datasource_last_synced.py deleted file mode 100644 index e813efaac..000000000 --- a/netbox/core/migrations/0002_datasource_last_synced.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Django 4.1.5 on 2023-01-26 20:11 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('core', '0001_initial'), - ] - - operations = [ - migrations.AddField( - model_name='datasource', - name='last_synced', - field=models.DateTimeField(blank=True, editable=False, null=True), - ), - ] diff --git a/netbox/core/migrations/0003_datasource_created_datasource_last_updated.py b/netbox/core/migrations/0003_datasource_created_datasource_last_updated.py deleted file mode 100644 index 7f09ac32a..000000000 --- a/netbox/core/migrations/0003_datasource_created_datasource_last_updated.py +++ /dev/null @@ -1,23 +0,0 @@ -# Generated by Django 4.1.5 on 2023-01-26 20:50 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('core', '0002_datasource_last_synced'), - ] - - operations = [ - migrations.AddField( - model_name='datasource', - name='created', - field=models.DateTimeField(auto_now_add=True, null=True), - ), - migrations.AddField( - model_name='datasource', - name='last_updated', - field=models.DateTimeField(auto_now=True, null=True), - ), - ] diff --git a/netbox/core/migrations/0004_datasource_status.py b/netbox/core/migrations/0004_datasource_status.py deleted file mode 100644 index 4c5a543b7..000000000 --- a/netbox/core/migrations/0004_datasource_status.py +++ /dev/null @@ -1,18 +0,0 @@ -# Generated by Django 4.1.5 on 2023-01-27 14:09 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ('core', '0003_datasource_created_datasource_last_updated'), - ] - - operations = [ - migrations.AddField( - model_name='datasource', - name='status', - field=models.CharField(default='new', editable=False, max_length=50), - ), - ] diff --git a/netbox/core/models/data.py b/netbox/core/models/data.py index 853bdfdee..e84f912a4 100644 --- a/netbox/core/models/data.py +++ b/netbox/core/models/data.py @@ -6,6 +6,7 @@ from fnmatch import fnmatchcase from urllib.parse import quote, urlunparse, urlparse from django.contrib.contenttypes.models import ContentType +from django.core.validators import RegexValidator from django.db import models from django.urls import reverse from django.utils import timezone @@ -17,6 +18,8 @@ from netbox.models import ChangeLoggedModel from utilities.files import sha256_hash from utilities.querysets import RestrictedQuerySet from ..choices import * +from ..exceptions import SyncError +from ..utils import FakeTempDirectory __all__ = ( 'DataSource', @@ -28,7 +31,7 @@ logger = logging.getLogger('netbox.core.data') class DataSource(ChangeLoggedModel): """ - A remote source from which DataFiles are synchronized. + A remote source, such as a git repository, from which DataFiles are synchronized. """ name = models.CharField( max_length=100, @@ -119,14 +122,16 @@ class DataSource(ChangeLoggedModel): """ Create/update/delete child DataFiles as necessary to synchronize with the remote source. """ + if not self.ready_for_sync: + raise SyncError(f"Cannot initiate sync; data source not ready/enabled") + self.status = DataSourceStatusChoices.SYNCING - self.save() + DataSource.objects.filter(pk=self.pk).update(status=self.status) # Replicate source data locally (if needed) - temp_dir = tempfile.TemporaryDirectory() - self.fetch(path=temp_dir.name) + local_path = self.fetch() - logger.debug(f'Syncing files from source root {temp_dir.name}') + logger.debug(f'Syncing files from source root {local_path.name}') data_files = self.datafiles.all() known_paths = {df.path for df in data_files} logger.debug(f'Starting with {len(known_paths)} known files') @@ -137,7 +142,7 @@ class DataSource(ChangeLoggedModel): for datafile in data_files: try: - if datafile.refresh_from_disk(source_root=temp_dir.name): + if datafile.refresh_from_disk(source_root=local_path.name): updated_files.append(datafile) except FileNotFoundError: # File no longer exists @@ -153,26 +158,26 @@ class DataSource(ChangeLoggedModel): logger.debug(f"Deleted {updated_count} files") # Walk the local replication to find new files - new_paths = self._walk(temp_dir.name) - known_paths + new_paths = self._walk(local_path.name) - known_paths # Bulk create new files new_datafiles = [] for path in new_paths: datafile = DataFile(source=self, path=path) - datafile.refresh_from_disk(source_root=temp_dir.name) + datafile.refresh_from_disk(source_root=local_path.name) + datafile.full_clean() new_datafiles.append(datafile) - # TODO: Record last_updated? created_count = len(DataFile.objects.bulk_create(new_datafiles, batch_size=100)) logger.debug(f"Created {created_count} data files") # Update status & last_synced time self.status = DataSourceStatusChoices.COMPLETED - self.last_synced = timezone.now() - self.save() + self.last_updated = timezone.now() + DataSource.objects.filter(pk=self.pk).update(status=self.status, last_updated=self.last_updated) - temp_dir.cleanup() + local_path.cleanup() - def fetch(self, path): + def fetch(self): """ Replicate the file structure from the remote data source and return the local path. """ @@ -182,19 +187,23 @@ class DataSource(ChangeLoggedModel): except AttributeError: raise NotImplemented(f"fetch() not yet supported for {self.get_type_display()} data sources") - return fetch_method(path) + return fetch_method() def fetch_local(self, path): """ Skip fetching for local paths; return the source path directly. """ logger.debug(f"Data source type is local; skipping fetch") - return urlparse(self.url).path + local_path = urlparse(self.url).path - def fetch_git(self, path): + return FakeTempDirectory(local_path) + + def fetch_git(self): """ Perform a shallow clone of the remote repository using the `git` executable. """ + local_path = tempfile.TemporaryDirectory() + # Add authentication credentials to URL (if specified) if self.username and self.password: url_components = list(urlparse(self.url)) @@ -208,10 +217,17 @@ class DataSource(ChangeLoggedModel): args = ['git', 'clone', '--depth', '1'] if self.git_branch: args.extend(['--branch', self.git_branch]) - args.extend([url, path]) + args.extend([url, local_path.name]) - logger.debug(f"Cloning git repo: {''.join(args)}") - result = subprocess.run(args) + logger.debug(f"Cloning git repo: {' '.join(args)}") + try: + subprocess.run(args, check=True, capture_output=True) + except subprocess.CalledProcessError as e: + raise SyncError( + f"Fetching remote data failed: {e.stderr}" + ) + + return local_path def _walk(self, root): """ @@ -246,7 +262,8 @@ class DataSource(ChangeLoggedModel): class DataFile(models.Model): """ - A database object which represents a remote file fetched from a DataSource. + The database representation of a remote file fetched from a remote DataSource. DataFile instances should be created, + updated, or deleted only by calling DataSource.sync(). """ source = models.ForeignKey( to='core.DataSource', @@ -256,8 +273,8 @@ class DataFile(models.Model): ) path = models.CharField( max_length=1000, - unique=True, - editable=False + editable=False, + help_text=_("File path relative to the data source's root") ) last_updated = models.DateTimeField( editable=False @@ -265,10 +282,13 @@ class DataFile(models.Model): size = models.PositiveIntegerField( editable=False ) - # TODO: Create a proper SHA256 field hash = models.CharField( max_length=64, - editable=False + editable=False, + validators=[ + RegexValidator(regex='^[0-9a-f]{64}$', message=_("Length must be 64 hexadecimal characters.")) + ], + help_text=_("SHA256 hash of the file data") ) data = models.BinaryField() @@ -286,27 +306,20 @@ class DataFile(models.Model): def __str__(self): return self.path - # def get_absolute_url(self): - # return reverse('core:datafile', args=[self.pk]) - def refresh_from_disk(self, source_root): """ Update instance attributes from the file on disk. Returns True if any attribute has changed. """ file_path = os.path.join(source_root, self.path) - - # Get attributes from file on disk - file_size = os.path.getsize(file_path) file_hash = sha256_hash(file_path).hexdigest() # Update instance file attributes & data - has_changed = file_size != self.size or file_hash != self.hash - if has_changed: + if is_modified := file_hash != self.hash: self.last_updated = timezone.now() - self.size = file_size + self.size = os.path.getsize(file_path) self.hash = file_hash with open(file_path, 'rb') as f: self.data = f.read() - return has_changed + return is_modified diff --git a/netbox/core/utils.py b/netbox/core/utils.py new file mode 100644 index 000000000..92c1786f6 --- /dev/null +++ b/netbox/core/utils.py @@ -0,0 +1,14 @@ +__all__ = ( + 'FakeTempDirectory', +) + + +class FakeTempDirectory: + """ + Mimic tempfile.TemporaryDirectory to represent a real local path. + """ + def __init__(self, name): + self.name = name + + def cleanup(self): + pass