pass delimiter to clean_csv method #13239

This commit is contained in:
Abhimanyu Saharan 2023-08-30 19:11:17 +05:30
parent 81024af024
commit 74f3cd7466
2 changed files with 19 additions and 15 deletions

View File

@ -67,7 +67,7 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
# Process data according to the selected format # Process data according to the selected format
if format == ImportFormatChoices.CSV: if format == ImportFormatChoices.CSV:
self.cleaned_data['data'] = self._clean_csv(data) self.cleaned_data['data'] = self._clean_csv(data, delimiter=self.cleaned_data['csv_delimiter'])
elif format == ImportFormatChoices.JSON: elif format == ImportFormatChoices.JSON:
self.cleaned_data['data'] = self._clean_json(data) self.cleaned_data['data'] = self._clean_json(data)
elif format == ImportFormatChoices.YAML: elif format == ImportFormatChoices.YAML:
@ -93,26 +93,30 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
'format': _('Unable to detect data format. Please specify.') 'format': _('Unable to detect data format. Please specify.')
}) })
def _clean_csv(self, data): def _clean_csv(self, data, delimiter=None):
""" """
Clean CSV-formatted data. The first row will be treated as column headers. Clean CSV-formatted data. The first row will be treated as column headers.
""" """
if self.cleaned_data['csv_delimiter'] == CSVDelimiterChoices.AUTO: if delimiter == CSVDelimiterChoices.AUTO:
# Determine the CSV dialect # Determine the CSV dialect
try: try:
# This uses a rough heuristic to detect the CSV dialect. If the data is malformed, we'll fall back to # This uses a rough heuristic to detect the CSV dialect. If the data is malformed, we'll fall back to
# the default Excel dialect. # the default Excel dialect. Note that delimiter can only be one character.
dialect = csv.Sniffer().sniff( dialect = csv.Sniffer().sniff(
data.strip(), delimiters=''.join( data.strip(), delimiters=''.join([CSVDelimiterChoices.COMMA, CSVDelimiterChoices.SEMICOLON])
[CSVDelimiterChoices.COMMA, CSVDelimiterChoices.SEMICOLON, CSVDelimiterChoices.TAB]
)
) )
except csv.Error: except csv.Error:
dialect = csv.excel dialect = csv.excel
else: elif delimiter in [CSVDelimiterChoices.COMMA, CSVDelimiterChoices.SEMICOLON]:
dialect = csv.excel dialect = csv.excel
dialect.delimiter = self.cleaned_data['csv_delimiter'] dialect.delimiter = delimiter
elif delimiter == CSVDelimiterChoices.TAB:
dialect = csv.excel_tab
else:
raise forms.ValidationError({
'csv_delimiter': _('Invalid CSV delimiter'),
})
stream = StringIO(data.strip()) stream = StringIO(data.strip())
reader = csv.reader(stream, dialect=dialect) reader = csv.reader(stream, dialect=dialect)

View File

@ -340,7 +340,7 @@ class ImportFormTest(TestCase):
"1,2,3\n" "1,2,3\n"
"4,5,6\n" "4,5,6\n"
) )
self.assertEqual(form._clean_csv(data), [ self.assertEqual(form._clean_csv(data, delimiter=','), [
{'a': '1', 'b': '2', 'c': '3'}, {'a': '1', 'b': '2', 'c': '3'},
{'a': '4', 'b': '5', 'c': '6'}, {'a': '4', 'b': '5', 'c': '6'},
]) ])
@ -350,17 +350,17 @@ class ImportFormTest(TestCase):
"1;2;3\n" "1;2;3\n"
"4;5;6\n" "4;5;6\n"
) )
self.assertEqual(form._clean_csv(data), [ self.assertEqual(form._clean_csv(data, delimiter=';'), [
{'a': '1', 'b': '2', 'c': '3'}, {'a': '1', 'b': '2', 'c': '3'},
{'a': '4', 'b': '5', 'c': '6'}, {'a': '4', 'b': '5', 'c': '6'},
]) ])
data = ( data = (
"a b c\n" "a\tb\tc\n"
"1 2 3\n" "1\t2\t3\n"
"4 5 6\n" "4\t5\t6\n"
) )
self.assertEqual(form._clean_csv(data), [ self.assertEqual(form._clean_csv(data, delimiter='\t'), [
{'a': '1', 'b': '2', 'c': '3'}, {'a': '1', 'b': '2', 'c': '3'},
{'a': '4', 'b': '5', 'c': '6'}, {'a': '4', 'b': '5', 'c': '6'},
]) ])