Add support for CSV-semicolon and TSV

This commit is contained in:
Per von Zweigbergk 2023-09-05 10:23:43 +02:00
parent 15c36514e6
commit df3a5cd422

View File

@ -21,7 +21,11 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
data = forms.CharField( data = forms.CharField(
required=False, required=False,
widget=forms.Textarea(attrs={'class': 'font-monospace'}), widget=forms.Textarea(attrs={'class': 'font-monospace'}),
help_text=_("Enter object data in CSV, JSON or YAML format.") help_text=_("Enter object data in CSV, JSON or YAML format."),
# Do not let Django strip data, because this can mess with TSV files.
# When the last column of the last row is empty, the TSV will end with
# a '\t' and that should not be stripped out!
strip=False,
) )
upload_file = forms.FileField( upload_file = forms.FileField(
label="Data file", label="Data file",
@ -61,6 +65,10 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
# Process data according to the selected format # Process data according to the selected format
if format == ImportFormatChoices.CSV: if format == ImportFormatChoices.CSV:
self.cleaned_data['data'] = self._clean_csv(data) self.cleaned_data['data'] = self._clean_csv(data)
elif format == ImportFormatChoices.CSV_SEMICOLON:
self.cleaned_data['data'] = self._clean_csv(data, delimiter=';')
elif format == ImportFormatChoices.TSV:
self.cleaned_data['data'] = self._clean_tsv(data, dialect='excel-tab')
elif format == ImportFormatChoices.JSON: elif format == ImportFormatChoices.JSON:
self.cleaned_data['data'] = self._clean_json(data) self.cleaned_data['data'] = self._clean_json(data)
elif format == ImportFormatChoices.YAML: elif format == ImportFormatChoices.YAML:
@ -78,20 +86,26 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
return ImportFormatChoices.JSON return ImportFormatChoices.JSON
if data.startswith('---') or data.startswith('- '): if data.startswith('---') or data.startswith('- '):
return ImportFormatChoices.YAML return ImportFormatChoices.YAML
if ',' in data.split('\n', 1)[0]: first_line = data.split('\n', 1)[0]
if ',' in first_line:
return ImportFormatChoices.CSV return ImportFormatChoices.CSV
if ';' in first_line:
return ImportFormatChoices.CSV_SEMICOLON
if '\t' in first_line:
return ImportFormatChoices.TSV
except IndexError: except IndexError:
pass pass
raise forms.ValidationError({ raise forms.ValidationError({
'format': _('Unable to detect data format. Please specify.') 'format': _('Unable to detect data format. Please specify.')
}) })
def _clean_csv(self, data): def _clean_csv(self, data, **csv_reader_kwargs):
""" """
Clean CSV-formatted data. The first row will be treated as column headers. Clean CSV-formatted data. The first row will be treated as column headers.
""" """
stream = StringIO(data.strip()) # Strip spaces and newlines only, leave tabs alone because they are significant in TSV mode
reader = csv.reader(stream) stream = StringIO(data(' \n'))
reader = csv.reader(stream, **csv_reader_kwargs)
headers, records = parse_csv(reader) headers, records = parse_csv(reader)
# Set CSV headers for reference by the model form # Set CSV headers for reference by the model form