Closes #11558: Add support for remote data sources (#11646)

* WIP

* WIP

* Add git sync

* Fix file hashing

* Add last_synced to DataSource

* Build out UI & API resources

* Add status field to DataSource

* Add UI control to sync data source

* Add API endpoint to sync data sources

* Fix display of DataSource job results

* DataSource password should be write-only

* General cleanup

* Add data file UI view

* Punt on HTTP, FTP support for now

* Add DataSource URL validation

* Add HTTP proxy support to git fetcher

* Add management command to sync data sources

* DataFile REST API endpoints should be read-only

* Refactor fetch methods into backend classes

* Replace auth & git branch fields with general-purpose parameters

* Fix last_synced time

* Render discrete form fields for backend parameters

* Enable dynamic edit form for DataSource

* Register DataBackend classes in application registry

* Add search indexers for DataSource, DataFile

* Add single & bulk delete views for DataFile

* Add model documentation

* Convert DataSource to a primary model

* Introduce pre_sync & post_sync signals

* Clean up migrations

* Rename url to source_url

* Clean up filtersets

* Add API & filterset tests

* Add view tests

* Add initSelect() to HTMX refresh handler

* Render DataSourceForm fieldsets dynamically

* Update compiled static resources
This commit is contained in:
Jeremy Stretch
2023-02-02 10:06:23 -05:00
committed by jeremystretch
parent e65b2a9fb3
commit d8784d4155
53 changed files with 1865 additions and 14 deletions

View File

View File

@@ -0,0 +1,93 @@
from django.urls import reverse
from django.utils import timezone
from utilities.testing import APITestCase, APIViewTestCases
from ..choices import *
from ..models import *
class AppTest(APITestCase):
def test_root(self):
url = reverse('core-api:api-root')
response = self.client.get('{}?format=api'.format(url), **self.header)
self.assertEqual(response.status_code, 200)
class DataSourceTest(APIViewTestCases.APIViewTestCase):
model = DataSource
brief_fields = ['display', 'id', 'name', 'url']
bulk_update_data = {
'enabled': False,
'description': 'foo bar baz',
}
@classmethod
def setUpTestData(cls):
data_sources = (
DataSource(name='Data Source 1', type=DataSourceTypeChoices.LOCAL, source_url='file:///var/tmp/source1/'),
DataSource(name='Data Source 2', type=DataSourceTypeChoices.LOCAL, source_url='file:///var/tmp/source2/'),
DataSource(name='Data Source 3', type=DataSourceTypeChoices.LOCAL, source_url='file:///var/tmp/source3/'),
)
DataSource.objects.bulk_create(data_sources)
cls.create_data = [
{
'name': 'Data Source 4',
'type': DataSourceTypeChoices.GIT,
'source_url': 'https://example.com/git/source4'
},
{
'name': 'Data Source 5',
'type': DataSourceTypeChoices.GIT,
'source_url': 'https://example.com/git/source5'
},
{
'name': 'Data Source 6',
'type': DataSourceTypeChoices.GIT,
'source_url': 'https://example.com/git/source6'
},
]
class DataFileTest(
APIViewTestCases.GetObjectViewTestCase,
APIViewTestCases.ListObjectsViewTestCase,
APIViewTestCases.GraphQLTestCase
):
model = DataFile
brief_fields = ['display', 'id', 'path', 'url']
@classmethod
def setUpTestData(cls):
datasource = DataSource.objects.create(
name='Data Source 1',
type=DataSourceTypeChoices.LOCAL,
source_url='file:///var/tmp/source1/'
)
data_files = (
DataFile(
source=datasource,
path='dir1/file1.txt',
last_updated=timezone.now(),
size=1000,
hash='442da078f0111cbdf42f21903724f6597c692535f55bdfbbea758a1ae99ad9e1'
),
DataFile(
source=datasource,
path='dir1/file2.txt',
last_updated=timezone.now(),
size=2000,
hash='a78168c7c97115bafd96450ed03ea43acec495094c5caa28f0d02e20e3a76cc2'
),
DataFile(
source=datasource,
path='dir1/file3.txt',
last_updated=timezone.now(),
size=3000,
hash='12b8827a14c4d5a2f30b6c6e2b7983063988612391c6cbe8ee7493b59054827a'
),
)
DataFile.objects.bulk_create(data_files)

View File

@@ -0,0 +1,120 @@
from datetime import datetime
from django.test import TestCase
from django.utils import timezone
from utilities.testing import ChangeLoggedFilterSetTests
from ..choices import *
from ..filtersets import *
from ..models import *
class DataSourceTestCase(TestCase, ChangeLoggedFilterSetTests):
queryset = DataSource.objects.all()
filterset = DataSourceFilterSet
@classmethod
def setUpTestData(cls):
data_sources = (
DataSource(
name='Data Source 1',
type=DataSourceTypeChoices.LOCAL,
source_url='file:///var/tmp/source1/',
status=DataSourceStatusChoices.NEW,
enabled=True
),
DataSource(
name='Data Source 2',
type=DataSourceTypeChoices.LOCAL,
source_url='file:///var/tmp/source2/',
status=DataSourceStatusChoices.SYNCING,
enabled=True
),
DataSource(
name='Data Source 3',
type=DataSourceTypeChoices.GIT,
source_url='https://example.com/git/source3',
status=DataSourceStatusChoices.COMPLETED,
enabled=False
),
)
DataSource.objects.bulk_create(data_sources)
def test_name(self):
params = {'name': ['Data Source 1', 'Data Source 2']}
self.assertEqual(self.filterset(params, self.queryset).qs.count(), 2)
def test_type(self):
params = {'type': [DataSourceTypeChoices.LOCAL]}
self.assertEqual(self.filterset(params, self.queryset).qs.count(), 2)
def test_enabled(self):
params = {'enabled': 'true'}
self.assertEqual(self.filterset(params, self.queryset).qs.count(), 2)
params = {'enabled': 'false'}
self.assertEqual(self.filterset(params, self.queryset).qs.count(), 1)
def test_status(self):
params = {'status': [DataSourceStatusChoices.NEW, DataSourceStatusChoices.SYNCING]}
self.assertEqual(self.filterset(params, self.queryset).qs.count(), 2)
class DataFileTestCase(TestCase, ChangeLoggedFilterSetTests):
queryset = DataFile.objects.all()
filterset = DataFileFilterSet
@classmethod
def setUpTestData(cls):
data_sources = (
DataSource(name='Data Source 1', type=DataSourceTypeChoices.LOCAL, source_url='file:///var/tmp/source1/'),
DataSource(name='Data Source 2', type=DataSourceTypeChoices.LOCAL, source_url='file:///var/tmp/source2/'),
DataSource(name='Data Source 3', type=DataSourceTypeChoices.LOCAL, source_url='file:///var/tmp/source3/'),
)
DataSource.objects.bulk_create(data_sources)
data_files = (
DataFile(
source=data_sources[0],
path='dir1/file1.txt',
last_updated=datetime(2023, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
size=1000,
hash='442da078f0111cbdf42f21903724f6597c692535f55bdfbbea758a1ae99ad9e1'
),
DataFile(
source=data_sources[1],
path='dir1/file2.txt',
last_updated=datetime(2023, 1, 2, 0, 0, 0, tzinfo=timezone.utc),
size=2000,
hash='a78168c7c97115bafd96450ed03ea43acec495094c5caa28f0d02e20e3a76cc2'
),
DataFile(
source=data_sources[2],
path='dir1/file3.txt',
last_updated=datetime(2023, 1, 3, 0, 0, 0, tzinfo=timezone.utc),
size=3000,
hash='12b8827a14c4d5a2f30b6c6e2b7983063988612391c6cbe8ee7493b59054827a'
),
)
DataFile.objects.bulk_create(data_files)
def test_source(self):
sources = DataSource.objects.all()
params = {'source_id': [sources[0].pk, sources[1].pk]}
self.assertEqual(self.filterset(params, self.queryset).qs.count(), 2)
params = {'source': [sources[0].name, sources[1].name]}
self.assertEqual(self.filterset(params, self.queryset).qs.count(), 2)
def test_path(self):
params = {'path': ['dir1/file1.txt', 'dir1/file2.txt']}
self.assertEqual(self.filterset(params, self.queryset).qs.count(), 2)
def test_size(self):
params = {'size': [1000, 2000]}
self.assertEqual(self.filterset(params, self.queryset).qs.count(), 2)
def test_hash(self):
params = {'hash': [
'442da078f0111cbdf42f21903724f6597c692535f55bdfbbea758a1ae99ad9e1',
'a78168c7c97115bafd96450ed03ea43acec495094c5caa28f0d02e20e3a76cc2',
]}
self.assertEqual(self.filterset(params, self.queryset).qs.count(), 2)

View File

@@ -0,0 +1,91 @@
from django.utils import timezone
from utilities.testing import ViewTestCases, create_tags
from ..choices import *
from ..models import *
class DataSourceTestCase(ViewTestCases.PrimaryObjectViewTestCase):
model = DataSource
@classmethod
def setUpTestData(cls):
data_sources = (
DataSource(name='Data Source 1', type=DataSourceTypeChoices.LOCAL, source_url='file:///var/tmp/source1/'),
DataSource(name='Data Source 2', type=DataSourceTypeChoices.LOCAL, source_url='file:///var/tmp/source2/'),
DataSource(name='Data Source 3', type=DataSourceTypeChoices.LOCAL, source_url='file:///var/tmp/source3/'),
)
DataSource.objects.bulk_create(data_sources)
tags = create_tags('Alpha', 'Bravo', 'Charlie')
cls.form_data = {
'name': 'Data Source X',
'type': DataSourceTypeChoices.GIT,
'source_url': 'http:///exmaple/com/foo/bar/',
'description': 'Something',
'comments': 'Foo bar baz',
'tags': [t.pk for t in tags],
}
cls.csv_data = (
f"name,type,source_url,enabled",
f"Data Source 4,{DataSourceTypeChoices.LOCAL},file:///var/tmp/source4/,true",
f"Data Source 5,{DataSourceTypeChoices.LOCAL},file:///var/tmp/source4/,true",
f"Data Source 6,{DataSourceTypeChoices.GIT},http:///exmaple/com/foo/bar/,false",
)
cls.csv_update_data = (
"id,name,description",
f"{data_sources[0].pk},Data Source 7,New description7",
f"{data_sources[1].pk},Data Source 8,New description8",
f"{data_sources[2].pk},Data Source 9,New description9",
)
cls.bulk_edit_data = {
'enabled': False,
'description': 'New description',
}
class DataFileTestCase(
ViewTestCases.GetObjectViewTestCase,
ViewTestCases.GetObjectChangelogViewTestCase,
ViewTestCases.DeleteObjectViewTestCase,
ViewTestCases.ListObjectsViewTestCase,
ViewTestCases.BulkDeleteObjectsViewTestCase,
):
model = DataFile
@classmethod
def setUpTestData(cls):
datasource = DataSource.objects.create(
name='Data Source 1',
type=DataSourceTypeChoices.LOCAL,
source_url='file:///var/tmp/source1/'
)
data_files = (
DataFile(
source=datasource,
path='dir1/file1.txt',
last_updated=timezone.now(),
size=1000,
hash='442da078f0111cbdf42f21903724f6597c692535f55bdfbbea758a1ae99ad9e1'
),
DataFile(
source=datasource,
path='dir1/file2.txt',
last_updated=timezone.now(),
size=2000,
hash='a78168c7c97115bafd96450ed03ea43acec495094c5caa28f0d02e20e3a76cc2'
),
DataFile(
source=datasource,
path='dir1/file3.txt',
last_updated=timezone.now(),
size=3000,
hash='12b8827a14c4d5a2f30b6c6e2b7983063988612391c6cbe8ee7493b59054827a'
),
)
DataFile.objects.bulk_create(data_files)