0

In my jupyter notebook, I have a csv file that has the columns year | month | day | hour I have successfully managed to combine year-month-day into one column and convert to type datetime, enter image description here

however when trying to add the hour into the datetime it seems to fail? using

df["datetime"] =  df['day'].map(str) + '-' + df['month'].map(str) \
+ '-' + df['year'].map(str) + '-' + df['hour'].map(str)
df["datetime"] = pd.to_datetime(df["datetime"], dayfirst = True, infer_datetime_format=True)

does not seem to work causing this error trace

TypeError                                 Traceback (most recent call last)
File D:\Applications\annaconda3\lib\site-packages\pandas\core\arrays\datetimes.py:2211, in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object, allow_mixed)
   2210 try:
-> 2211     values, tz_parsed = conversion.datetime_to_datetime64(data.ravel("K"))
   2212     # If tzaware, these values represent unix timestamps, so we
   2213     #  return them as i8 to distinguish from wall times

File D:\Applications\annaconda3\lib\site-packages\pandas\_libs\tslibs\conversion.pyx:360, in pandas._libs.tslibs.conversion.datetime_to_datetime64()

TypeError: Unrecognized value type: <class 'str'>

During handling of the above exception, another exception occurred:

ParserError                               Traceback (most recent call last)
Input In [10], in <cell line: 6>()
      1 # Clean and preprocess the data, removing any missing values and converting the dates into a more suitable form.
      2 
      3 # -> convert the dates into a more suitable form 
      4 df["datetime"] =  df['day'].map(str) + '-' + df['month'].map(str) \
      5 + '-' + df['year'].map(str) + '-' + df['hour'].map(str)
----> 6 df["datetime"] = pd.to_datetime(df["datetime"], dayfirst = True, infer_datetime_format=True)
      8 # -> remove missing values
      9 df.dropna(how = "any", inplace=True)

File D:\Applications\annaconda3\lib\site-packages\pandas\core\tools\datetimes.py:1051, in to_datetime(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)
   1049         result = arg.map(cache_array)
   1050     else:
-> 1051         values = convert_listlike(arg._values, format)
   1052         result = arg._constructor(values, index=arg.index, name=arg.name)
   1053 elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)):

File D:\Applications\annaconda3\lib\site-packages\pandas\core\tools\datetimes.py:402, in _convert_listlike_datetimes(arg, format, name, tz, unit, errors, infer_datetime_format, dayfirst, yearfirst, exact)
    400 assert format is None or infer_datetime_format
    401 utc = tz == "utc"
--> 402 result, tz_parsed = objects_to_datetime64ns(
    403     arg,
    404     dayfirst=dayfirst,
    405     yearfirst=yearfirst,
    406     utc=utc,
    407     errors=errors,
    408     require_iso8601=require_iso8601,
    409     allow_object=True,
    410 )
    412 if tz_parsed is not None:
    413     # We can take a shortcut since the datetime64 numpy array
    414     # is in UTC
    415     dta = DatetimeArray(result, dtype=tz_to_dtype(tz_parsed))

File D:\Applications\annaconda3\lib\site-packages\pandas\core\arrays\datetimes.py:2217, in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object, allow_mixed)
   2215         return values.view("i8"), tz_parsed
   2216     except (ValueError, TypeError):
-> 2217         raise err
   2219 if tz_parsed is not None:
   2220     # We can take a shortcut since the datetime64 numpy array
   2221     #  is in UTC
   2222     # Return i8 values to denote unix timestamps
   2223     return result.view("i8"), tz_parsed

File D:\Applications\annaconda3\lib\site-packages\pandas\core\arrays\datetimes.py:2199, in objects_to_datetime64ns(data, dayfirst, yearfirst, utc, errors, require_iso8601, allow_object, allow_mixed)
   2197 order: Literal["F", "C"] = "F" if flags.f_contiguous else "C"
   2198 try:
-> 2199     result, tz_parsed = tslib.array_to_datetime(
   2200         data.ravel("K"),
   2201         errors=errors,
   2202         utc=utc,
   2203         dayfirst=dayfirst,
   2204         yearfirst=yearfirst,
   2205         require_iso8601=require_iso8601,
   2206         allow_mixed=allow_mixed,
   2207     )
   2208     result = result.reshape(data.shape, order=order)
   2209 except ValueError as err:

File D:\Applications\annaconda3\lib\site-packages\pandas\_libs\tslib.pyx:381, in pandas._libs.tslib.array_to_datetime()

File D:\Applications\annaconda3\lib\site-packages\pandas\_libs\tslib.pyx:613, in pandas._libs.tslib.array_to_datetime()

File D:\Applications\annaconda3\lib\site-packages\pandas\_libs\tslib.pyx:751, in pandas._libs.tslib._array_to_datetime_object()

File D:\Applications\annaconda3\lib\site-packages\pandas\_libs\tslib.pyx:742, in pandas._libs.tslib._array_to_datetime_object()

File D:\Applications\annaconda3\lib\site-packages\pandas\_libs\tslibs\parsing.pyx:281, in pandas._libs.tslibs.parsing.parse_datetime_string()

File D:\Applications\annaconda3\lib\site-packages\dateutil\parser\_parser.py:1368, in parse(timestr, parserinfo, **kwargs)
   1366     return parser(parserinfo).parse(timestr, **kwargs)
   1367 else:
-> 1368     return DEFAULTPARSER.parse(timestr, **kwargs)

File D:\Applications\annaconda3\lib\site-packages\dateutil\parser\_parser.py:643, in parser.parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
    640 res, skipped_tokens = self._parse(timestr, **kwargs)
    642 if res is None:
--> 643     raise ParserError("Unknown string format: %s", timestr)
    645 if len(res) == 0:
    646     raise ParserError("String does not contain a date: %s", timestr)

ParserError: Unknown string format: 1-1-2014-0

How do I correctly combine the columns so my datetime column shows YYYY-MM-DD-HH:00 (*note that the 00 is arbirtrary as we only have measured hours)

Avinash
  • 852
  • 5
  • 18
Xray25
  • 89
  • 10

0 Answers0