pass delimiter to clean_csv method #13239

This commit is contained in:
Abhimanyu Saharan 2023-08-30 19:11:17 +05:30
parent 81024af024
commit 74f3cd7466
2 changed files with 19 additions and 15 deletions

View File

@ -67,7 +67,7 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
# Process data according to the selected format
if format == ImportFormatChoices.CSV:
self.cleaned_data['data'] = self._clean_csv(data)
self.cleaned_data['data'] = self._clean_csv(data, delimiter=self.cleaned_data['csv_delimiter'])
elif format == ImportFormatChoices.JSON:
self.cleaned_data['data'] = self._clean_json(data)
elif format == ImportFormatChoices.YAML:
@ -93,26 +93,30 @@ class BulkImportForm(BootstrapMixin, SyncedDataMixin, forms.Form):
'format': _('Unable to detect data format. Please specify.')
})
def _clean_csv(self, data):
def _clean_csv(self, data, delimiter=None):
"""
Clean CSV-formatted data. The first row will be treated as column headers.
"""
if self.cleaned_data['csv_delimiter'] == CSVDelimiterChoices.AUTO:
if delimiter == CSVDelimiterChoices.AUTO:
# Determine the CSV dialect
try:
# This uses a rough heuristic to detect the CSV dialect. If the data is malformed, we'll fall back to
# the default Excel dialect.
# the default Excel dialect. Note that delimiter can only be one character.
dialect = csv.Sniffer().sniff(
data.strip(), delimiters=''.join(
[CSVDelimiterChoices.COMMA, CSVDelimiterChoices.SEMICOLON, CSVDelimiterChoices.TAB]
)
data.strip(), delimiters=''.join([CSVDelimiterChoices.COMMA, CSVDelimiterChoices.SEMICOLON])
)
except csv.Error:
dialect = csv.excel
else:
elif delimiter in [CSVDelimiterChoices.COMMA, CSVDelimiterChoices.SEMICOLON]:
dialect = csv.excel
dialect.delimiter = self.cleaned_data['csv_delimiter']
dialect.delimiter = delimiter
elif delimiter == CSVDelimiterChoices.TAB:
dialect = csv.excel_tab
else:
raise forms.ValidationError({
'csv_delimiter': _('Invalid CSV delimiter'),
})
stream = StringIO(data.strip())
reader = csv.reader(stream, dialect=dialect)

View File

@ -340,7 +340,7 @@ class ImportFormTest(TestCase):
"1,2,3\n"
"4,5,6\n"
)
self.assertEqual(form._clean_csv(data), [
self.assertEqual(form._clean_csv(data, delimiter=','), [
{'a': '1', 'b': '2', 'c': '3'},
{'a': '4', 'b': '5', 'c': '6'},
])
@ -350,17 +350,17 @@ class ImportFormTest(TestCase):
"1;2;3\n"
"4;5;6\n"
)
self.assertEqual(form._clean_csv(data), [
self.assertEqual(form._clean_csv(data, delimiter=';'), [
{'a': '1', 'b': '2', 'c': '3'},
{'a': '4', 'b': '5', 'c': '6'},
])
data = (
"a b c\n"
"1 2 3\n"
"4 5 6\n"
"a\tb\tc\n"
"1\t2\t3\n"
"4\t5\t6\n"
)
self.assertEqual(form._clean_csv(data), [
self.assertEqual(form._clean_csv(data, delimiter='\t'), [
{'a': '1', 'b': '2', 'c': '3'},
{'a': '4', 'b': '5', 'c': '6'},
])