Skip to content

[FEA] Support groupby aggregations on list of series #1096

@mrocklin

Description

@mrocklin
import cudf
df = cudf.DataFrame({'x': [1, 2, 3], 'y': [1, 2, 1]})
df.groupby([df.x]).y.sum()

Traceback

Details
TypeError                                 Traceback (most recent call last)
<ipython-input-1-6404360106bb> in <module>
      1 import cudf
      2 df = cudf.DataFrame({'x': [1, 2, 3], 'y': [1, 2, 1]})
----> 3 df.groupby([df.x]).y.sum()

~/cudf/python/cudf/dataframe/dataframe.py in groupby(self, by, sort, as_index, method, level)
   1807             # __apply_agg
   1808             result = Groupby(self, by=by, method=method, as_index=as_index,
-> 1809                              level=level)
   1810             return result
   1811

~/cudf/python/cudf/groupby/groupby.py in __init__(self, df, by, method, as_index, level)
    107         else:
    108             self._by = [by] if isinstance(by, (str, Number)) else list(by)
--> 109         self._val_columns = [idx for idx in self._df.columns
    110                              if idx not in self._by]
    111         self._as_index = as_index

~/cudf/python/cudf/groupby/groupby.py in <listcomp>(.0)
    108             self._by = [by] if isinstance(by, (str, Number)) else list(by)
    109         self._val_columns = [idx for idx in self._df.columns
--> 110                              if idx not in self._by]
    111         self._as_index = as_index
    112         if (method == "hash"):

~/cudf/python/cudf/dataframe/series.py in __eq__(self, other)
    467
    468     def __eq__(self, other):
--> 469         return self._unordered_compare(other, 'eq')
    470
    471     def __ne__(self, other):

~/cudf/python/cudf/dataframe/series.py in _unordered_compare(self, other, cmpops)
    450     def _unordered_compare(self, other, cmpops):
    451         nvtx_range_push("CUDF_UNORDERED_COMP", "orange")
--> 452         other = self._normalize_binop_value(other)
    453         outcol = self._column.unordered_compare(cmpops, other._column)
    454         result = self._copy_construct(data=outcol)

~/cudf/python/cudf/dataframe/series.py in _normalize_binop_value(self, other)
    445             return Series(other)
    446         else:
--> 447             col = self._column.normalize_binop_value(other)
    448             return self._copy_construct(data=col)
    449

~/cudf/python/cudf/dataframe/numerical.py in normalize_binop_value(self, other)
    122             return self.replace(data=Buffer(ary), dtype=ary.dtype)
    123         else:
--> 124             raise TypeError('cannot broadcast {}'.format(type(other)))
    125
    126     def astype(self, dtype):

TypeError: cannot broadcast <class 'str'>

Metadata

Metadata

Assignees

Labels

PythonAffects Python cuDF API.daskDask issuefeature requestNew feature or request

Type

No type
No fields configured for issues without a type.

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions