This commit is contained in:
jeremystretch 2023-01-22 15:55:54 -05:00 committed by jeremystretch
parent c8779a839e
commit d59d883f8d
7 changed files with 283 additions and 0 deletions

0
netbox/core/__init__.py Normal file
View File

21
netbox/core/choices.py Normal file
View File

@ -0,0 +1,21 @@
from django.utils.translation import gettext as _
from utilities.choices import ChoiceSet
#
# Data sources
#
class DataSourceTypeChoices(ChoiceSet):
LOCAL = 'local'
HTTP = 'http'
FTP = 'ftp'
GIT = 'git'
CHOICES = [
(LOCAL, _('Local')),
(HTTP, _('HTTP(S)')),
(FTP, _('FTP(S)')),
(GIT, _('Git')),
]

View File

@ -0,0 +1,59 @@
# Generated by Django 4.1.5 on 2023-01-22 20:49
from django.db import migrations, models
import django.db.models.deletion
import taggit.managers
import utilities.json
class Migration(migrations.Migration):
initial = True
dependencies = [
('extras', '0084_staging'),
]
operations = [
migrations.CreateModel(
name='DataSource',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False)),
('created', models.DateTimeField(auto_now_add=True, null=True)),
('last_updated', models.DateTimeField(auto_now=True, null=True)),
('custom_field_data', models.JSONField(blank=True, default=dict, encoder=utilities.json.CustomFieldJSONEncoder)),
('comments', models.TextField(blank=True)),
('name', models.CharField(max_length=100, unique=True)),
('type', models.CharField(default='local', max_length=50)),
('enabled', models.BooleanField(default=True)),
('description', models.CharField(blank=True, max_length=200)),
('url', models.URLField()),
('tags', taggit.managers.TaggableManager(through='extras.TaggedItem', to='extras.Tag')),
],
options={
'ordering': ('name',),
},
),
migrations.CreateModel(
name='DataFile',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False)),
('created', models.DateTimeField(auto_now_add=True, null=True)),
('custom_field_data', models.JSONField(blank=True, default=dict, encoder=utilities.json.CustomFieldJSONEncoder)),
('path', models.CharField(max_length=1000, unique=True)),
('last_updated', models.DateTimeField()),
('size', models.PositiveIntegerField()),
('checksum', models.CharField(max_length=64)),
('data', models.BinaryField()),
('source', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='datafiles', to='core.datasource')),
('tags', taggit.managers.TaggableManager(through='extras.TaggedItem', to='extras.Tag')),
],
options={
'ordering': ('source', 'path'),
},
),
migrations.AddConstraint(
model_name='datafile',
constraint=models.UniqueConstraint(fields=('source', 'path'), name='core_datafile_unique_source_path'),
),
]

View File

196
netbox/core/models.py Normal file
View File

@ -0,0 +1,196 @@
import logging
import os
from functools import cached_property
from urllib.parse import urlparse
from django.conf import settings
from django.db import models
from django.urls import reverse
from django.utils import timezone
from django.utils.translation import gettext as _
from netbox.models import NetBoxModel, PrimaryModel
from utilities.files import sha256_checksum
from .choices import *
__all__ = (
'DataSource',
'DataFile',
)
logger = logging.getLogger('netbox.core.data')
class DataSource(PrimaryModel):
"""
A remote source from which DataFiles are synchronized.
"""
name = models.CharField(
max_length=100,
unique=True
)
type = models.CharField(
max_length=50,
choices=DataSourceTypeChoices,
default=DataSourceTypeChoices.LOCAL
)
enabled = models.BooleanField(
default=True
)
description = models.CharField(
max_length=200,
blank=True
)
url = models.URLField(
verbose_name='URL'
)
class Meta:
ordering = ('name',)
# def get_absolute_url(self):
# return reverse('core:datasource', args=[self.pk])
# @property
# def root_path(self):
# if self.pk is None:
# return None
# if self.type == DataSourceTypeChoices.LOCAL:
# return self.url.lstrip('file://')
# return os.path.join(DATASOURCES_CACHE_PATH, str(self.pk))
def sync(self):
"""
Create/update/delete child DataFiles as necessary to synchronize with the remote source.
"""
# Replicate source data locally (if needed)
source_root = self.fetch()
logger.debug(f'Syncing files from source root {source_root}')
data_files = self.datafiles.all()
known_paths = {df.path for df in data_files}
# Check for any updated/deleted files
updated_files = []
deleted_file_ids = []
for datafile in data_files:
try:
if datafile.refresh_from_disk(root_path=source_root):
updated_files.append(datafile)
except FileNotFoundError:
# File no longer exists
deleted_file_ids.append(datafile.pk)
continue
# Bulk update modified files
updated_count = DataFile.objects.bulk_update(updated_files, ['checksum'])
logger.debug(f"Updated {updated_count} data files")
# Bulk delete deleted files
deleted_count, _ = DataFile.objects.filter(pk__in=deleted_file_ids).delete()
logger.debug(f"Deleted {updated_count} data files")
# Walk the local replication to find new files
new_paths = self._walk(source_root) - known_paths
# Bulk create new files
new_datafiles = []
for path in new_paths:
datafile = DataFile(source=self, path=path)
datafile.refresh_from_disk(root_path=source_root)
new_datafiles.append(datafile)
# TODO: Record last_updated?
created_count = len(DataFile.objects.bulk_create(new_datafiles, batch_size=100))
logger.debug(f"Created {created_count} data files")
def fetch(self):
"""
Replicate the file structure from the remote data source and return the local path.
"""
logger.debug(f"Fetching source data for {self}")
if self.type == DataSourceTypeChoices.LOCAL:
logger.debug(f"Data source type is local; skipping fetch")
# No replication is necessary for local sources
return urlparse(self.url).path
raise NotImplemented(f"fetch() not yet supported for {self.get_type_display()} data sources")
# TODO: Sync remote files to tempfile.TemporaryDirectory
def _walk(self, root_path):
"""
Return a set of all non-excluded files within the root path.
"""
paths = set()
for path, dir_names, file_names in os.walk(root_path):
path = path.split(root_path)[1] # Strip root path
path.lstrip('/')
if path.startswith('.'):
continue
for file_name in file_names:
# TODO: Apply include/exclude rules
if file_name.startswith('.'):
continue
paths.add(os.path.join(path, file_name))
logger.debug(f"Found {len(paths)} files")
return paths
class DataFile(NetBoxModel):
"""
A database object which represents a remote file fetched from a DataSource.
"""
source = models.ForeignKey(
to='core.DataSource',
on_delete=models.CASCADE,
related_name='datafiles'
)
path = models.CharField(
max_length=1000,
unique=True
)
last_updated = models.DateTimeField()
size = models.PositiveIntegerField()
# TODO: Create a proper SHA256 field
checksum = models.CharField(
max_length=64
)
data = models.BinaryField()
class Meta:
ordering = ('source', 'path')
constraints = (
models.UniqueConstraint(
fields=('source', 'path'),
name='%(app_label)s_%(class)s_unique_source_path'
),
)
# def get_absolute_url(self):
# return reverse('core:datafile', args=[self.pk])
def refresh_from_disk(self, root_path):
"""
Update instance attributes from the file on disk. Returns True if any attribute
has changed.
"""
file_path = os.path.join(root_path, self.path)
# Get attributes from file on disk
file_size = os.path.getsize(file_path)
file_checksum = sha256_checksum(file_path)
# Update instance file attributes & data
has_changed = file_size != self.size or file_checksum != self.checksum
if has_changed:
self.last_updated = timezone.now()
self.size = file_size
self.checksum = file_checksum
with open(file_path, 'rb') as f:
self.data = f.read()
return has_changed

View File

@ -331,6 +331,7 @@ INSTALLED_APPS = [
'social_django', 'social_django',
'taggit', 'taggit',
'timezone_field', 'timezone_field',
'core',
'circuits', 'circuits',
'dcim', 'dcim',
'ipam', 'ipam',

View File

@ -0,0 +1,6 @@
import hashlib
def sha256_checksum(filepath):
# TODO: Write an actual checksum function
return hashlib.sha256(filepath.encode('utf-8'))