Adds csv dialect detection to bulk import view (#13563)

* adds csv dialect detection to bulk import view #13239

* adds sane delimiters for dialect detection #13239

* adds csv delimiter tests #13239

* adds csv delimiter on the form

* pass delimiter to clean_csv method #13239

* fix tests for csv import #13239

* fix tests for csv import #13239

* fix tests for csv import #13239

* fix tests for csv import #13239

* Improve auto-detection of import data format

* Misc cleanup

* Include tab as a supported delimiting character for auto-detection

* Move delimiting chars to a separate constant for easy reference

---------

Co-authored-by: Jeremy Stretch <jstretch@netboxlabs.com>
This commit is contained in:
Abhimanyu Saharan 2023-09-13 02:18:40 +05:30 committed by GitHub
parent 39cb9c32d6
commit b7cfb2f7d9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 123 additions and 15 deletions

View File

@ -17,7 +17,7 @@ from dcim.constants import *
from dcim.models import *
from ipam.models import ASN, RIR, VLAN, VRF
from tenancy.models import Tenant
from utilities.choices import ImportFormatChoices
from utilities.choices import CSVDelimiterChoices, ImportFormatChoices
from utilities.testing import ViewTestCases, create_tags, create_test_device, post_data
from wireless.models import WirelessLAN
@ -2014,6 +2014,7 @@ class ModuleTestCase(
'data': {
'data': '\n'.join(csv_data),
'format': ImportFormatChoices.CSV,
'csv_delimiter': CSVDelimiterChoices.AUTO,
}
}
@ -2030,6 +2031,7 @@ class ModuleTestCase(
'data': {
'data': '\n'.join(csv_data),
'format': ImportFormatChoices.CSV,
'csv_delimiter': CSVDelimiterChoices.AUTO,
}
}
@ -2106,6 +2108,7 @@ class ModuleTestCase(
'data': {
'data': '\n'.join(csv_data),
'format': ImportFormatChoices.CSV,
'csv_delimiter': CSVDelimiterChoices.AUTO,
}
}

View File

@ -12,6 +12,7 @@ from dcim.models import Manufacturer, Rack, Site
from extras.choices import *
from extras.models import CustomField, CustomFieldChoiceSet
from ipam.models import VLAN
from utilities.choices import CSVDelimiterChoices, ImportFormatChoices
from utilities.testing import APITestCase, TestCase
from virtualization.models import VirtualMachine
@ -1176,7 +1177,11 @@ class CustomFieldImportTest(TestCase):
)
csv_data = '\n'.join(','.join(row) for row in data)
response = self.client.post(reverse('dcim:site_import'), {'data': csv_data, 'format': 'csv'})
response = self.client.post(reverse('dcim:site_import'), {
'data': csv_data,
'format': ImportFormatChoices.CSV,
'csv_delimiter': CSVDelimiterChoices.AUTO,
})
self.assertEqual(response.status_code, 302)
self.assertEqual(Site.objects.count(), 3)

View File

@ -3,7 +3,7 @@ from django.test import override_settings
from dcim.models import *
from users.models import ObjectPermission
from utilities.choices import ImportFormatChoices
from utilities.choices import CSVDelimiterChoices, ImportFormatChoices
from utilities.testing import ModelViewTestCase, create_tags
@ -30,6 +30,7 @@ class CSVImportTestCase(ModelViewTestCase):
data = {
'format': ImportFormatChoices.CSV,
'data': self._get_csv_data(csv_data),
'csv_delimiter': CSVDelimiterChoices.AUTO,
}
# Assign model-level permission

View File

@ -45,6 +45,7 @@ Context:
<input type="hidden" name="import_method" value="direct" />
{% render_field form.data %}
{% render_field form.format %}
{% render_field form.csv_delimiter %}
<div class="form-group">
<div class="col col-md-12 text-end">
<button type="submit" name="data_submit" class="btn btn-primary">{% trans "Submit" %}</button>

View File

@ -1,6 +1,8 @@
from django.conf import settings
from django.utils.translation import gettext_lazy as _
from .constants import CSV_DELIMITERS
class ChoiceSetMeta(type):
"""
@ -230,3 +232,17 @@ class ImportFormatChoices(ChoiceSet):
(JSON, 'JSON'),
(YAML, 'YAML'),
]
class CSVDelimiterChoices(ChoiceSet):
AUTO = 'auto'
COMMA = CSV_DELIMITERS['comma']
SEMICOLON = CSV_DELIMITERS['semicolon']
TAB = CSV_DELIMITERS['tab']
CHOICES = [
(AUTO, _('Auto-detect')),
(COMMA, _('Comma')),
(SEMICOLON, _('Semicolon')),
(TAB, _('Tab')),
]

View File

@ -58,3 +58,14 @@ HTTP_REQUEST_META_SAFE_COPY = [
'SERVER_NAME',
'SERVER_PORT',
]
#
# CSV-style format delimiters
#
CSV_DELIMITERS = {
'comma': ',',
'semicolon': ';',
'tab': '\t',
}

View File

@ -7,10 +7,10 @@ from django import forms
from django.utils.translation import gettext as _
from core.forms.mixins import SyncedDataMixin
from utilities.choices import ImportFormatChoices
from utilities.choices import CSVDelimiterChoices, ImportFormatChoices, ImportMethodChoices
from utilities.constants import CSV_DELIMITERS
from utilities.forms.utils import parse_csv
from .mixins import BootstrapMixin
from ..choices import ImportMethodChoices
class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
@ -24,13 +24,20 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
help_text=_("Enter object data in CSV, JSON or YAML format.")
)
upload_file = forms.FileField(
label="Data file",
label=_("Data file"),
required=False
)
format = forms.ChoiceField(
choices=ImportFormatChoices,
initial=ImportFormatChoices.AUTO
)
csv_delimiter = forms.ChoiceField(
choices=CSVDelimiterChoices,
initial=CSVDelimiterChoices.AUTO,
label=_("CSV delimiter"),
help_text=_("The character which delimits CSV fields. Applies only to CSV format."),
required=False
)
data_field = 'data'
@ -54,13 +61,18 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
# Determine the data format
if self.cleaned_data['format'] == ImportFormatChoices.AUTO:
format = self._detect_format(data)
if self.cleaned_data['csv_delimiter'] != CSVDelimiterChoices.AUTO:
# Specifying the CSV delimiter implies CSV format
format = ImportFormatChoices.CSV
else:
format = self._detect_format(data)
else:
format = self.cleaned_data['format']
# Process data according to the selected format
if format == ImportFormatChoices.CSV:
self.cleaned_data['data'] = self._clean_csv(data)
delimiter = self.cleaned_data.get('csv_delimiter', CSVDelimiterChoices.AUTO)
self.cleaned_data['data'] = self._clean_csv(data, delimiter=delimiter)
elif format == ImportFormatChoices.JSON:
self.cleaned_data['data'] = self._clean_json(data)
elif format == ImportFormatChoices.YAML:
@ -78,7 +90,10 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
return ImportFormatChoices.JSON
if data.startswith('---') or data.startswith('- '):
return ImportFormatChoices.YAML
if ',' in data.split('\n', 1)[0]:
# Look for any of the CSV delimiters in the first line (ignoring the default 'auto' choice)
first_line = data.split('\n', 1)[0]
csv_delimiters = CSV_DELIMITERS.values()
if any(x in first_line for x in csv_delimiters):
return ImportFormatChoices.CSV
except IndexError:
pass
@ -86,12 +101,31 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
'format': _('Unable to detect data format. Please specify.')
})
def _clean_csv(self, data):
def _clean_csv(self, data, delimiter=CSVDelimiterChoices.AUTO):
"""
Clean CSV-formatted data. The first row will be treated as column headers.
"""
# Determine the CSV dialect
if delimiter == CSVDelimiterChoices.AUTO:
# This uses a rough heuristic to detect the CSV dialect based on the presence of supported delimiting
# characters. If the data is malformed, we'll fall back to the default Excel dialect.
delimiters = ''.join(CSV_DELIMITERS.values())
try:
dialect = csv.Sniffer().sniff(data.strip(), delimiters=delimiters)
except csv.Error:
dialect = csv.excel
elif delimiter in (CSVDelimiterChoices.COMMA, CSVDelimiterChoices.SEMICOLON):
dialect = csv.excel
dialect.delimiter = delimiter
elif delimiter == CSVDelimiterChoices.TAB:
dialect = csv.excel_tab
else:
raise forms.ValidationError({
'csv_delimiter': _('Invalid CSV delimiter'),
})
stream = StringIO(data.strip())
reader = csv.reader(stream)
reader = csv.reader(stream, dialect=dialect)
headers, records = parse_csv(reader)
# Set CSV headers for reference by the model form

View File

@ -11,7 +11,7 @@ from extras.choices import ObjectChangeActionChoices
from extras.models import ObjectChange
from netbox.models.features import ChangeLoggingMixin
from users.models import ObjectPermission
from utilities.choices import ImportFormatChoices
from utilities.choices import CSVDelimiterChoices, ImportFormatChoices
from .base import ModelTestCase
from .utils import disable_warnings, post_data
@ -580,7 +580,8 @@ class ViewTestCases:
def test_bulk_import_objects_without_permission(self):
data = {
'data': self._get_csv_data(),
'format': 'csv',
'format': ImportFormatChoices.CSV,
'csv_delimiter': CSVDelimiterChoices.AUTO,
}
# Test GET without permission
@ -597,7 +598,8 @@ class ViewTestCases:
initial_count = self._get_queryset().count()
data = {
'data': self._get_csv_data(),
'format': 'csv',
'format': ImportFormatChoices.CSV,
'csv_delimiter': CSVDelimiterChoices.AUTO,
}
# Assign model-level permission
@ -626,6 +628,7 @@ class ViewTestCases:
data = {
'format': ImportFormatChoices.CSV,
'data': csv_data,
'csv_delimiter': CSVDelimiterChoices.AUTO,
}
# Assign model-level permission
@ -658,7 +661,8 @@ class ViewTestCases:
initial_count = self._get_queryset().count()
data = {
'data': self._get_csv_data(),
'format': 'csv',
'format': ImportFormatChoices.CSV,
'csv_delimiter': CSVDelimiterChoices.AUTO,
}
# Assign constrained permission

View File

@ -331,3 +331,36 @@ class ImportFormTest(TestCase):
form._detect_format('')
with self.assertRaises(forms.ValidationError):
form._detect_format('?')
def test_csv_delimiters(self):
form = BulkImportForm()
data = (
"a,b,c\n"
"1,2,3\n"
"4,5,6\n"
)
self.assertEqual(form._clean_csv(data, delimiter=','), [
{'a': '1', 'b': '2', 'c': '3'},
{'a': '4', 'b': '5', 'c': '6'},
])
data = (
"a;b;c\n"
"1;2;3\n"
"4;5;6\n"
)
self.assertEqual(form._clean_csv(data, delimiter=';'), [
{'a': '1', 'b': '2', 'c': '3'},
{'a': '4', 'b': '5', 'c': '6'},
])
data = (
"a\tb\tc\n"
"1\t2\t3\n"
"4\t5\t6\n"
)
self.assertEqual(form._clean_csv(data, delimiter='\t'), [
{'a': '1', 'b': '2', 'c': '3'},
{'a': '4', 'b': '5', 'c': '6'},
])