In my jupyter notebook, I have a csv file that has the columns year | month | day | hour I have successfully managed to combine year-month-day into one column and convert to type datetime,
however when trying to add the hour into the datetime it seems to fail? using
df["datetime"] = df['day'].map(str) + '-' + df['month'].map(str) \
+ '-' + df['year'].map(str) + '-' + df['hour'].map(str)
df["datetime"] = pd.to_datetime(df["datetime"], dayfirst = True, infer_datetime_format=True)
does not seem to work causing this error trace
TypeError Traceback (most recent call last)
File D:\Applications\annaconda3\lib\site-packages\pandas\core\arrays\datetimes.py:2211, in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object, allow_mixed)
2210 try:
-> 2211 values, tz_parsed = conversion.datetime_to_datetime64(data.ravel("K"))
2212 # If tzaware, these values represent unix timestamps, so we
2213 # return them as i8 to distinguish from wall times
File D:\Applications\annaconda3\lib\site-packages\pandas\_libs\tslibs\conversion.pyx:360, in pandas._libs.tslibs.conversion.datetime_to_datetime64()
TypeError: Unrecognized value type: <class 'str'>
During handling of the above exception, another exception occurred:
ParserError Traceback (most recent call last)
Input In [10], in <cell line: 6>()
1 # Clean and preprocess the data, removing any missing values and converting the dates into a more suitable form.
2
3 # -> convert the dates into a more suitable form
4 df["datetime"] = df['day'].map(str) + '-' + df['month'].map(str) \
5 + '-' + df['year'].map(str) + '-' + df['hour'].map(str)
----> 6 df["datetime"] = pd.to_datetime(df["datetime"], dayfirst = True, infer_datetime_format=True)
8 # -> remove missing values
9 df.dropna(how = "any", inplace=True)
File D:\Applications\annaconda3\lib\site-packages\pandas\core\tools\datetimes.py:1051, in to_datetime(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)
1049 result = arg.map(cache_array)
1050 else:
-> 1051 values = convert_listlike(arg._values, format)
1052 result = arg._constructor(values, index=arg.index, name=arg.name)
1053 elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)):
File D:\Applications\annaconda3\lib\site-packages\pandas\core\tools\datetimes.py:402, in _convert_listlike_datetimes(arg, format, name, tz, unit, errors, infer_datetime_format, dayfirst, yearfirst, exact)
400 assert format is None or infer_datetime_format
401 utc = tz == "utc"
--> 402 result, tz_parsed = objects_to_datetime64ns(
403 arg,
404 dayfirst=dayfirst,
405 yearfirst=yearfirst,
406 utc=utc,
407 errors=errors,
408 require_iso8601=require_iso8601,
409 allow_object=True,
410 )
412 if tz_parsed is not None:
413 # We can take a shortcut since the datetime64 numpy array
414 # is in UTC
415 dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))
File D:\Applications\annaconda3\lib\site-packages\pandas\core\arrays\datetimes.py:2217, in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object, allow_mixed)
2215 return values.view("i8"), tz_parsed
2216 except (ValueError, TypeError):
-> 2217 raise err
2219 if tz_parsed is not None:
2220 # We can take a shortcut since the datetime64 numpy array
2221 # is in UTC
2222 # Return i8 values to denote unix timestamps
2223 return result.view("i8"), tz_parsed
File D:\Applications\annaconda3\lib\site-packages\pandas\core\arrays\datetimes.py:2199, in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object, allow_mixed)
2197 order: Literal["F", "C"] = "F" if flags.f_contiguous else "C"
2198 try:
-> 2199 result, tz_parsed = tslib.array_to_datetime(
2200 data.ravel("K"),
2201 errors=errors,
2202 utc=utc,
2203 dayfirst=dayfirst,
2204 yearfirst=yearfirst,
2205 require_iso8601=require_iso8601,
2206 allow_mixed=allow_mixed,
2207 )
2208 result = result.reshape(data.shape, order=order)
2209 except ValueError as err:
File D:\Applications\annaconda3\lib\site-packages\pandas\_libs\tslib.pyx:381, in pandas._libs.tslib.array_to_datetime()
File D:\Applications\annaconda3\lib\site-packages\pandas\_libs\tslib.pyx:613, in pandas._libs.tslib.array_to_datetime()
File D:\Applications\annaconda3\lib\site-packages\pandas\_libs\tslib.pyx:751, in pandas._libs.tslib._array_to_datetime_object()
File D:\Applications\annaconda3\lib\site-packages\pandas\_libs\tslib.pyx:742, in pandas._libs.tslib._array_to_datetime_object()
File D:\Applications\annaconda3\lib\site-packages\pandas\_libs\tslibs\parsing.pyx:281, in pandas._libs.tslibs.parsing.parse_datetime_string()
File D:\Applications\annaconda3\lib\site-packages\dateutil\parser\_parser.py:1368, in parse(timestr, parserinfo, **kwargs)
1366 return parser(parserinfo).parse(timestr, **kwargs)
1367 else:
-> 1368 return DEFAULTPARSER.parse(timestr, **kwargs)
File D:\Applications\annaconda3\lib\site-packages\dateutil\parser\_parser.py:643, in parser.parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
640 res, skipped_tokens = self._parse(timestr, **kwargs)
642 if res is None:
--> 643 raise ParserError("Unknown string format: %s", timestr)
645 if len(res) == 0:
646 raise ParserError("String does not contain a date: %s", timestr)
ParserError: Unknown string format: 1-1-2014-0
How do I correctly combine the columns so my datetime column shows YYYY-MM-DD-HH:00 (*note that the 00 is arbirtrary as we only have measured hours)