General cleanup

This commit is contained in:
jeremystretch 2023-01-27 13:38:52 -05:00
parent 77cc6e133f
commit b38b4e11a6
8 changed files with 77 additions and 102 deletions

View File

@ -0,0 +1,2 @@
class SyncError(Exception):
pass

View File

@ -2,6 +2,7 @@ import logging
from extras.choices import JobResultStatusChoices
from .choices import *
from .exceptions import SyncError
from .models import DataSource
logger = logging.getLogger(__name__)
@ -16,9 +17,8 @@ def sync_datasource(job_result, *args, **kwargs):
try:
job_result.start()
datasource.sync()
except Exception:
except SyncError as e:
job_result.set_status(JobResultStatusChoices.STATUS_ERRORED)
job_result.save()
datasource.status = DataSourceStatusChoices.FAILED
datasource.save()
logging.error(f"Error during syncing of data source {datasource}")
DataSource.objects.filter(pk=datasource.pk).update(status=DataSourceStatusChoices.FAILED)
logging.error(e)

View File

@ -1,5 +1,6 @@
# Generated by Django 4.1.5 on 2023-01-26 19:46
# Generated by Django 4.1.5 on 2023-01-27 18:15
import django.core.validators
from django.db import migrations, models
import django.db.models.deletion
@ -16,15 +17,19 @@ class Migration(migrations.Migration):
name='DataSource',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False)),
('created', models.DateTimeField(auto_now_add=True, null=True)),
('last_updated', models.DateTimeField(auto_now=True, null=True)),
('name', models.CharField(max_length=100, unique=True)),
('type', models.CharField(default='local', max_length=50)),
('url', models.CharField(max_length=200)),
('status', models.CharField(default='new', editable=False, max_length=50)),
('enabled', models.BooleanField(default=True)),
('description', models.CharField(blank=True, max_length=200)),
('url', models.CharField(max_length=200)),
('git_branch', models.CharField(blank=True, max_length=100)),
('ignore_rules', models.TextField(blank=True)),
('username', models.CharField(blank=True, max_length=100)),
('password', models.CharField(blank=True, max_length=100)),
('git_branch', models.CharField(blank=True, max_length=100)),
('last_synced', models.DateTimeField(blank=True, editable=False, null=True)),
],
options={
'ordering': ('name',),
@ -34,10 +39,10 @@ class Migration(migrations.Migration):
name='DataFile',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False)),
('path', models.CharField(editable=False, max_length=1000, unique=True)),
('path', models.CharField(editable=False, max_length=1000)),
('last_updated', models.DateTimeField(editable=False)),
('size', models.PositiveIntegerField(editable=False)),
('hash', models.CharField(editable=False, max_length=64)),
('hash', models.CharField(editable=False, max_length=64, validators=[django.core.validators.RegexValidator(message='Length must be 64 hexadecimal characters.', regex='^[0-9a-f]{64}$')])),
('data', models.BinaryField()),
('source', models.ForeignKey(editable=False, on_delete=django.db.models.deletion.CASCADE, related_name='datafiles', to='core.datasource')),
],

View File

@ -1,18 +0,0 @@
# Generated by Django 4.1.5 on 2023-01-26 20:11
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0001_initial'),
]
operations = [
migrations.AddField(
model_name='datasource',
name='last_synced',
field=models.DateTimeField(blank=True, editable=False, null=True),
),
]

View File

@ -1,23 +0,0 @@
# Generated by Django 4.1.5 on 2023-01-26 20:50
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0002_datasource_last_synced'),
]
operations = [
migrations.AddField(
model_name='datasource',
name='created',
field=models.DateTimeField(auto_now_add=True, null=True),
),
migrations.AddField(
model_name='datasource',
name='last_updated',
field=models.DateTimeField(auto_now=True, null=True),
),
]

View File

@ -1,18 +0,0 @@
# Generated by Django 4.1.5 on 2023-01-27 14:09
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('core', '0003_datasource_created_datasource_last_updated'),
]
operations = [
migrations.AddField(
model_name='datasource',
name='status',
field=models.CharField(default='new', editable=False, max_length=50),
),
]

View File

@ -6,6 +6,7 @@ from fnmatch import fnmatchcase
from urllib.parse import quote, urlunparse, urlparse
from django.contrib.contenttypes.models import ContentType
from django.core.validators import RegexValidator
from django.db import models
from django.urls import reverse
from django.utils import timezone
@ -17,6 +18,8 @@ from netbox.models import ChangeLoggedModel
from utilities.files import sha256_hash
from utilities.querysets import RestrictedQuerySet
from ..choices import *
from ..exceptions import SyncError
from ..utils import FakeTempDirectory
__all__ = (
'DataSource',
@ -28,7 +31,7 @@ logger = logging.getLogger('netbox.core.data')
class DataSource(ChangeLoggedModel):
"""
A remote source from which DataFiles are synchronized.
A remote source, such as a git repository, from which DataFiles are synchronized.
"""
name = models.CharField(
max_length=100,
@ -119,14 +122,16 @@ class DataSource(ChangeLoggedModel):
"""
Create/update/delete child DataFiles as necessary to synchronize with the remote source.
"""
if not self.ready_for_sync:
raise SyncError(f"Cannot initiate sync; data source not ready/enabled")
self.status = DataSourceStatusChoices.SYNCING
self.save()
DataSource.objects.filter(pk=self.pk).update(status=self.status)
# Replicate source data locally (if needed)
temp_dir = tempfile.TemporaryDirectory()
self.fetch(path=temp_dir.name)
local_path = self.fetch()
logger.debug(f'Syncing files from source root {temp_dir.name}')
logger.debug(f'Syncing files from source root {local_path.name}')
data_files = self.datafiles.all()
known_paths = {df.path for df in data_files}
logger.debug(f'Starting with {len(known_paths)} known files')
@ -137,7 +142,7 @@ class DataSource(ChangeLoggedModel):
for datafile in data_files:
try:
if datafile.refresh_from_disk(source_root=temp_dir.name):
if datafile.refresh_from_disk(source_root=local_path.name):
updated_files.append(datafile)
except FileNotFoundError:
# File no longer exists
@ -153,26 +158,26 @@ class DataSource(ChangeLoggedModel):
logger.debug(f"Deleted {updated_count} files")
# Walk the local replication to find new files
new_paths = self._walk(temp_dir.name) - known_paths
new_paths = self._walk(local_path.name) - known_paths
# Bulk create new files
new_datafiles = []
for path in new_paths:
datafile = DataFile(source=self, path=path)
datafile.refresh_from_disk(source_root=temp_dir.name)
datafile.refresh_from_disk(source_root=local_path.name)
datafile.full_clean()
new_datafiles.append(datafile)
# TODO: Record last_updated?
created_count = len(DataFile.objects.bulk_create(new_datafiles, batch_size=100))
logger.debug(f"Created {created_count} data files")
# Update status & last_synced time
self.status = DataSourceStatusChoices.COMPLETED
self.last_synced = timezone.now()
self.save()
self.last_updated = timezone.now()
DataSource.objects.filter(pk=self.pk).update(status=self.status, last_updated=self.last_updated)
temp_dir.cleanup()
local_path.cleanup()
def fetch(self, path):
def fetch(self):
"""
Replicate the file structure from the remote data source and return the local path.
"""
@ -182,19 +187,23 @@ class DataSource(ChangeLoggedModel):
except AttributeError:
raise NotImplemented(f"fetch() not yet supported for {self.get_type_display()} data sources")
return fetch_method(path)
return fetch_method()
def fetch_local(self, path):
"""
Skip fetching for local paths; return the source path directly.
"""
logger.debug(f"Data source type is local; skipping fetch")
return urlparse(self.url).path
local_path = urlparse(self.url).path
def fetch_git(self, path):
return FakeTempDirectory(local_path)
def fetch_git(self):
"""
Perform a shallow clone of the remote repository using the `git` executable.
"""
local_path = tempfile.TemporaryDirectory()
# Add authentication credentials to URL (if specified)
if self.username and self.password:
url_components = list(urlparse(self.url))
@ -208,10 +217,17 @@ class DataSource(ChangeLoggedModel):
args = ['git', 'clone', '--depth', '1']
if self.git_branch:
args.extend(['--branch', self.git_branch])
args.extend([url, path])
args.extend([url, local_path.name])
logger.debug(f"Cloning git repo: {''.join(args)}")
result = subprocess.run(args)
logger.debug(f"Cloning git repo: {' '.join(args)}")
try:
subprocess.run(args, check=True, capture_output=True)
except subprocess.CalledProcessError as e:
raise SyncError(
f"Fetching remote data failed: {e.stderr}"
)
return local_path
def _walk(self, root):
"""
@ -246,7 +262,8 @@ class DataSource(ChangeLoggedModel):
class DataFile(models.Model):
"""
A database object which represents a remote file fetched from a DataSource.
The database representation of a remote file fetched from a remote DataSource. DataFile instances should be created,
updated, or deleted only by calling DataSource.sync().
"""
source = models.ForeignKey(
to='core.DataSource',
@ -256,8 +273,8 @@ class DataFile(models.Model):
)
path = models.CharField(
max_length=1000,
unique=True,
editable=False
editable=False,
help_text=_("File path relative to the data source's root")
)
last_updated = models.DateTimeField(
editable=False
@ -265,10 +282,13 @@ class DataFile(models.Model):
size = models.PositiveIntegerField(
editable=False
)
# TODO: Create a proper SHA256 field
hash = models.CharField(
max_length=64,
editable=False
editable=False,
validators=[
RegexValidator(regex='^[0-9a-f]{64}$', message=_("Length must be 64 hexadecimal characters."))
],
help_text=_("SHA256 hash of the file data")
)
data = models.BinaryField()
@ -286,27 +306,20 @@ class DataFile(models.Model):
def __str__(self):
return self.path
# def get_absolute_url(self):
# return reverse('core:datafile', args=[self.pk])
def refresh_from_disk(self, source_root):
"""
Update instance attributes from the file on disk. Returns True if any attribute
has changed.
"""
file_path = os.path.join(source_root, self.path)
# Get attributes from file on disk
file_size = os.path.getsize(file_path)
file_hash = sha256_hash(file_path).hexdigest()
# Update instance file attributes & data
has_changed = file_size != self.size or file_hash != self.hash
if has_changed:
if is_modified := file_hash != self.hash:
self.last_updated = timezone.now()
self.size = file_size
self.size = os.path.getsize(file_path)
self.hash = file_hash
with open(file_path, 'rb') as f:
self.data = f.read()
return has_changed
return is_modified

14
netbox/core/utils.py Normal file
View File

@ -0,0 +1,14 @@
__all__ = (
'FakeTempDirectory',
)
class FakeTempDirectory:
"""
Mimic tempfile.TemporaryDirectory to represent a real local path.
"""
def __init__(self, name):
self.name = name
def cleanup(self):
pass