General cleanup

This commit is contained in:
jeremystretch 2023-01-27 13:38:52 -05:00
parent 77cc6e133f
commit b38b4e11a6
8 changed files with 77 additions and 102 deletions

View File

@ -0,0 +1,2 @@
class SyncError(Exception):
pass

View File

@ -2,6 +2,7 @@ import logging
from extras.choices import JobResultStatusChoices from extras.choices import JobResultStatusChoices
from .choices import * from .choices import *
from .exceptions import SyncError
from .models import DataSource from .models import DataSource
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -16,9 +17,8 @@ def sync_datasource(job_result, *args, **kwargs):
try: try:
job_result.start() job_result.start()
datasource.sync() datasource.sync()
except Exception: except SyncError as e:
job_result.set_status(JobResultStatusChoices.STATUS_ERRORED) job_result.set_status(JobResultStatusChoices.STATUS_ERRORED)
job_result.save() job_result.save()
datasource.status = DataSourceStatusChoices.FAILED DataSource.objects.filter(pk=datasource.pk).update(status=DataSourceStatusChoices.FAILED)
datasource.save() logging.error(e)
logging.error(f"Error during syncing of data source {datasource}")

View File

@ -1,5 +1,6 @@
# Generated by Django 4.1.5 on 2023-01-26 19:46 # Generated by Django 4.1.5 on 2023-01-27 18:15
import django.core.validators
from django.db import migrations, models from django.db import migrations, models
import django.db.models.deletion import django.db.models.deletion
@ -16,15 +17,19 @@ class Migration(migrations.Migration):
name='DataSource', name='DataSource',
fields=[ fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False)), ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False)),
('created', models.DateTimeField(auto_now_add=True, null=True)),
('last_updated', models.DateTimeField(auto_now=True, null=True)),
('name', models.CharField(max_length=100, unique=True)), ('name', models.CharField(max_length=100, unique=True)),
('type', models.CharField(default='local', max_length=50)), ('type', models.CharField(default='local', max_length=50)),
('url', models.CharField(max_length=200)),
('status', models.CharField(default='new', editable=False, max_length=50)),
('enabled', models.BooleanField(default=True)), ('enabled', models.BooleanField(default=True)),
('description', models.CharField(blank=True, max_length=200)), ('description', models.CharField(blank=True, max_length=200)),
('url', models.CharField(max_length=200)), ('git_branch', models.CharField(blank=True, max_length=100)),
('ignore_rules', models.TextField(blank=True)), ('ignore_rules', models.TextField(blank=True)),
('username', models.CharField(blank=True, max_length=100)), ('username', models.CharField(blank=True, max_length=100)),
('password', models.CharField(blank=True, max_length=100)), ('password', models.CharField(blank=True, max_length=100)),
('git_branch', models.CharField(blank=True, max_length=100)), ('last_synced', models.DateTimeField(blank=True, editable=False, null=True)),
], ],
options={ options={
'ordering': ('name',), 'ordering': ('name',),
@ -34,10 +39,10 @@ class Migration(migrations.Migration):
name='DataFile', name='DataFile',
fields=[ fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False)), ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False)),
('path', models.CharField(editable=False, max_length=1000, unique=True)), ('path', models.CharField(editable=False, max_length=1000)),
('last_updated', models.DateTimeField(editable=False)), ('last_updated', models.DateTimeField(editable=False)),
('size', models.PositiveIntegerField(editable=False)), ('size', models.PositiveIntegerField(editable=False)),
('hash', models.CharField(editable=False, max_length=64)), ('hash', models.CharField(editable=False, max_length=64, validators=[django.core.validators.RegexValidator(message='Length must be 64 hexadecimal characters.', regex='^[0-9a-f]{64}$')])),
('data', models.BinaryField()), ('data', models.BinaryField()),
('source', models.ForeignKey(editable=False, on_delete=django.db.models.deletion.CASCADE, related_name='datafiles', to='core.datasource')), ('source', models.ForeignKey(editable=False, on_delete=django.db.models.deletion.CASCADE, related_name='datafiles', to='core.datasource')),
], ],

View File

@ -1,18 +0,0 @@
# Generated by Django 4.1.5 on 2023-01-26 20:11
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0001_initial'),
]
operations = [
migrations.AddField(
model_name='datasource',
name='last_synced',
field=models.DateTimeField(blank=True, editable=False, null=True),
),
]

View File

@ -1,23 +0,0 @@
# Generated by Django 4.1.5 on 2023-01-26 20:50
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0002_datasource_last_synced'),
]
operations = [
migrations.AddField(
model_name='datasource',
name='created',
field=models.DateTimeField(auto_now_add=True, null=True),
),
migrations.AddField(
model_name='datasource',
name='last_updated',
field=models.DateTimeField(auto_now=True, null=True),
),
]

View File

@ -1,18 +0,0 @@
# Generated by Django 4.1.5 on 2023-01-27 14:09
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0003_datasource_created_datasource_last_updated'),
]
operations = [
migrations.AddField(
model_name='datasource',
name='status',
field=models.CharField(default='new', editable=False, max_length=50),
),
]

View File

@ -6,6 +6,7 @@ from fnmatch import fnmatchcase
from urllib.parse import quote, urlunparse, urlparse from urllib.parse import quote, urlunparse, urlparse
from django.contrib.contenttypes.models import ContentType from django.contrib.contenttypes.models import ContentType
from django.core.validators import RegexValidator
from django.db import models from django.db import models
from django.urls import reverse from django.urls import reverse
from django.utils import timezone from django.utils import timezone
@ -17,6 +18,8 @@ from netbox.models import ChangeLoggedModel
from utilities.files import sha256_hash from utilities.files import sha256_hash
from utilities.querysets import RestrictedQuerySet from utilities.querysets import RestrictedQuerySet
from ..choices import * from ..choices import *
from ..exceptions import SyncError
from ..utils import FakeTempDirectory
__all__ = ( __all__ = (
'DataSource', 'DataSource',
@ -28,7 +31,7 @@ logger = logging.getLogger('netbox.core.data')
class DataSource(ChangeLoggedModel): class DataSource(ChangeLoggedModel):
""" """
A remote source from which DataFiles are synchronized. A remote source, such as a git repository, from which DataFiles are synchronized.
""" """
name = models.CharField( name = models.CharField(
max_length=100, max_length=100,
@ -119,14 +122,16 @@ class DataSource(ChangeLoggedModel):
""" """
Create/update/delete child DataFiles as necessary to synchronize with the remote source. Create/update/delete child DataFiles as necessary to synchronize with the remote source.
""" """
if not self.ready_for_sync:
raise SyncError(f"Cannot initiate sync; data source not ready/enabled")
self.status = DataSourceStatusChoices.SYNCING self.status = DataSourceStatusChoices.SYNCING
self.save() DataSource.objects.filter(pk=self.pk).update(status=self.status)
# Replicate source data locally (if needed) # Replicate source data locally (if needed)
temp_dir = tempfile.TemporaryDirectory() local_path = self.fetch()
self.fetch(path=temp_dir.name)
logger.debug(f'Syncing files from source root {temp_dir.name}') logger.debug(f'Syncing files from source root {local_path.name}')
data_files = self.datafiles.all() data_files = self.datafiles.all()
known_paths = {df.path for df in data_files} known_paths = {df.path for df in data_files}
logger.debug(f'Starting with {len(known_paths)} known files') logger.debug(f'Starting with {len(known_paths)} known files')
@ -137,7 +142,7 @@ class DataSource(ChangeLoggedModel):
for datafile in data_files: for datafile in data_files:
try: try:
if datafile.refresh_from_disk(source_root=temp_dir.name): if datafile.refresh_from_disk(source_root=local_path.name):
updated_files.append(datafile) updated_files.append(datafile)
except FileNotFoundError: except FileNotFoundError:
# File no longer exists # File no longer exists
@ -153,26 +158,26 @@ class DataSource(ChangeLoggedModel):
logger.debug(f"Deleted {updated_count} files") logger.debug(f"Deleted {updated_count} files")
# Walk the local replication to find new files # Walk the local replication to find new files
new_paths = self._walk(temp_dir.name) - known_paths new_paths = self._walk(local_path.name) - known_paths
# Bulk create new files # Bulk create new files
new_datafiles = [] new_datafiles = []
for path in new_paths: for path in new_paths:
datafile = DataFile(source=self, path=path) datafile = DataFile(source=self, path=path)
datafile.refresh_from_disk(source_root=temp_dir.name) datafile.refresh_from_disk(source_root=local_path.name)
datafile.full_clean()
new_datafiles.append(datafile) new_datafiles.append(datafile)
# TODO: Record last_updated?
created_count = len(DataFile.objects.bulk_create(new_datafiles, batch_size=100)) created_count = len(DataFile.objects.bulk_create(new_datafiles, batch_size=100))
logger.debug(f"Created {created_count} data files") logger.debug(f"Created {created_count} data files")
# Update status & last_synced time # Update status & last_synced time
self.status = DataSourceStatusChoices.COMPLETED self.status = DataSourceStatusChoices.COMPLETED
self.last_synced = timezone.now() self.last_updated = timezone.now()
self.save() DataSource.objects.filter(pk=self.pk).update(status=self.status, last_updated=self.last_updated)
temp_dir.cleanup() local_path.cleanup()
def fetch(self, path): def fetch(self):
""" """
Replicate the file structure from the remote data source and return the local path. Replicate the file structure from the remote data source and return the local path.
""" """
@ -182,19 +187,23 @@ class DataSource(ChangeLoggedModel):
except AttributeError: except AttributeError:
raise NotImplemented(f"fetch() not yet supported for {self.get_type_display()} data sources") raise NotImplemented(f"fetch() not yet supported for {self.get_type_display()} data sources")
return fetch_method(path) return fetch_method()
def fetch_local(self, path): def fetch_local(self, path):
""" """
Skip fetching for local paths; return the source path directly. Skip fetching for local paths; return the source path directly.
""" """
logger.debug(f"Data source type is local; skipping fetch") logger.debug(f"Data source type is local; skipping fetch")
return urlparse(self.url).path local_path = urlparse(self.url).path
def fetch_git(self, path): return FakeTempDirectory(local_path)
def fetch_git(self):
""" """
Perform a shallow clone of the remote repository using the `git` executable. Perform a shallow clone of the remote repository using the `git` executable.
""" """
local_path = tempfile.TemporaryDirectory()
# Add authentication credentials to URL (if specified) # Add authentication credentials to URL (if specified)
if self.username and self.password: if self.username and self.password:
url_components = list(urlparse(self.url)) url_components = list(urlparse(self.url))
@ -208,10 +217,17 @@ class DataSource(ChangeLoggedModel):
args = ['git', 'clone', '--depth', '1'] args = ['git', 'clone', '--depth', '1']
if self.git_branch: if self.git_branch:
args.extend(['--branch', self.git_branch]) args.extend(['--branch', self.git_branch])
args.extend([url, path]) args.extend([url, local_path.name])
logger.debug(f"Cloning git repo: {' '.join(args)}") logger.debug(f"Cloning git repo: {' '.join(args)}")
result = subprocess.run(args) try:
subprocess.run(args, check=True, capture_output=True)
except subprocess.CalledProcessError as e:
raise SyncError(
f"Fetching remote data failed: {e.stderr}"
)
return local_path
def _walk(self, root): def _walk(self, root):
""" """
@ -246,7 +262,8 @@ class DataSource(ChangeLoggedModel):
class DataFile(models.Model): class DataFile(models.Model):
""" """
A database object which represents a remote file fetched from a DataSource. The database representation of a remote file fetched from a remote DataSource. DataFile instances should be created,
updated, or deleted only by calling DataSource.sync().
""" """
source = models.ForeignKey( source = models.ForeignKey(
to='core.DataSource', to='core.DataSource',
@ -256,8 +273,8 @@ class DataFile(models.Model):
) )
path = models.CharField( path = models.CharField(
max_length=1000, max_length=1000,
unique=True, editable=False,
editable=False help_text=_("File path relative to the data source's root")
) )
last_updated = models.DateTimeField( last_updated = models.DateTimeField(
editable=False editable=False
@ -265,10 +282,13 @@ class DataFile(models.Model):
size = models.PositiveIntegerField( size = models.PositiveIntegerField(
editable=False editable=False
) )
# TODO: Create a proper SHA256 field
hash = models.CharField( hash = models.CharField(
max_length=64, max_length=64,
editable=False editable=False,
validators=[
RegexValidator(regex='^[0-9a-f]{64}$', message=_("Length must be 64 hexadecimal characters."))
],
help_text=_("SHA256 hash of the file data")
) )
data = models.BinaryField() data = models.BinaryField()
@ -286,27 +306,20 @@ class DataFile(models.Model):
def __str__(self): def __str__(self):
return self.path return self.path
# def get_absolute_url(self):
# return reverse('core:datafile', args=[self.pk])
def refresh_from_disk(self, source_root): def refresh_from_disk(self, source_root):
""" """
Update instance attributes from the file on disk. Returns True if any attribute Update instance attributes from the file on disk. Returns True if any attribute
has changed. has changed.
""" """
file_path = os.path.join(source_root, self.path) file_path = os.path.join(source_root, self.path)
# Get attributes from file on disk
file_size = os.path.getsize(file_path)
file_hash = sha256_hash(file_path).hexdigest() file_hash = sha256_hash(file_path).hexdigest()
# Update instance file attributes & data # Update instance file attributes & data
has_changed = file_size != self.size or file_hash != self.hash if is_modified := file_hash != self.hash:
if has_changed:
self.last_updated = timezone.now() self.last_updated = timezone.now()
self.size = file_size self.size = os.path.getsize(file_path)
self.hash = file_hash self.hash = file_hash
with open(file_path, 'rb') as f: with open(file_path, 'rb') as f:
self.data = f.read() self.data = f.read()
return has_changed return is_modified

14
netbox/core/utils.py Normal file
View File

@ -0,0 +1,14 @@
__all__ = (
'FakeTempDirectory',
)
class FakeTempDirectory:
"""
Mimic tempfile.TemporaryDirectory to represent a real local path.
"""
def __init__(self, name):
self.name = name
def cleanup(self):
pass