1. error
2. solution
1. error
df = pd.read_csv("data_test.csv")
- error message
더보기
{
{
"name": "UnicodeDecodeError",
"message": "'utf-8' codec can't decode bytes in position 0-1: unexpected end of data",
"stack": "---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
Cell In[4], line 1
----> 1 df = pd.read_csv(\"./procedure2/procedure_CHAMC.csv\")
File /opt/conda/lib/python3.11/site-packages/pandas/util/_decorators.py:211, in deprecate_kwarg.<locals>._deprecate_kwarg.<locals>.wrapper(*args, **kwargs)
209 else:
210 kwargs[new_arg_name] = new_arg_value
--> 211 return func(*args, **kwargs)
File /opt/conda/lib/python3.11/site-packages/pandas/util/_decorators.py:331, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
325 if len(args) > num_allow_args:
326 warnings.warn(
327 msg.format(arguments=_format_argument_list(allow_args)),
328 FutureWarning,
329 stacklevel=find_stack_level(),
330 )
--> 331 return func(*args, **kwargs)
File /opt/conda/lib/python3.11/site-packages/pandas/io/parsers/readers.py:950, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, error_bad_lines, warn_bad_lines, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options)
935 kwds_defaults = _refine_defaults_read(
936 dialect,
937 delimiter,
(...)
946 defaults={\"delimiter\": \",\"},
947 )
948 kwds.update(kwds_defaults)
--> 950 return _read(filepath_or_buffer, kwds)
File /opt/conda/lib/python3.11/site-packages/pandas/io/parsers/readers.py:605, in _read(filepath_or_buffer, kwds)
602 _validate_names(kwds.get(\"names\", None))
604 # Create the parser.
--> 605 parser = TextFileReader(filepath_or_buffer, **kwds)
607 if chunksize or iterator:
608 return parser
File /opt/conda/lib/python3.11/site-packages/pandas/io/parsers/readers.py:1442, in TextFileReader.__init__(self, f, engine, **kwds)
1439 self.options[\"has_index_names\"] = kwds[\"has_index_names\"]
1441 self.handles: IOHandles | None = None
-> 1442 self._engine = self._make_engine(f, self.engine)
File /opt/conda/lib/python3.11/site-packages/pandas/io/parsers/readers.py:1753, in TextFileReader._make_engine(self, f, engine)
1750 raise ValueError(msg)
1752 try:
-> 1753 return mapping[engine](f, **self.options)
1754 except Exception:
1755 if self.handles is not None:
File /opt/conda/lib/python3.11/site-packages/pandas/io/parsers/c_parser_wrapper.py:79, in CParserWrapper.__init__(self, src, **kwds)
76 kwds.pop(key, None)
78 kwds[\"dtype\"] = ensure_dtype_objs(kwds.get(\"dtype\", None))
---> 79 self._reader = parsers.TextReader(src, **kwds)
81 self.unnamed_cols = self._reader.unnamed_cols
83 # error: Cannot determine type of 'names'
File /opt/conda/lib/python3.11/site-packages/pandas/_libs/parsers.pyx:547, in pandas._libs.parsers.TextReader.__cinit__()
File /opt/conda/lib/python3.11/site-packages/pandas/_libs/parsers.pyx:636, in pandas._libs.parsers.TextReader._get_header()
File /opt/conda/lib/python3.11/site-packages/pandas/_libs/parsers.pyx:852, in pandas._libs.parsers.TextReader._tokenize_rows()
File /opt/conda/lib/python3.11/site-packages/pandas/_libs/parsers.pyx:1965, in pandas._libs.parsers.raise_parser_error()
UnicodeDecodeError: 'utf-8' codec can't decode bytes in position 0-1: unexpected end of data"
}
2. solution
- encoding issue
>> encoding_errors='ignore'
df = pd.read_csv("data_test.csv", encoding_errors='ignore')
>> encoding format을 찾고 encoding='cp949' 옵션 넣어 해결