Closed
Description
This works as expected:
>>> s = pd.Series(['Name', 'email|Name|address', 'address|email'])
>>> s.str.get_dummies(sep='|')
Name address email
0 1 0 0
1 1 1 1
2 0 1 1
However, if any of the categorical variables is named exactly 'name'
, then there's a problem.
>>> s = pd.Series(['name', 'email|Name|address', 'address|email'])
>>> s.str.get_dummies(sep='|')
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-5-1a39a6dcd56b> in <module>()
----> 1 s.str.get_dummies(sep='|')
/Users/dgrady/anaconda/envs/python3/lib/python3.5/site-packages/pandas/core/strings.py in get_dummies(self, sep)
1377 data = self._orig.astype(str) if self._is_categorical else self._data
1378 result = str_get_dummies(data, sep)
-> 1379 return self._wrap_result(result, use_codes=(not self._is_categorical))
1380
1381 @copy(str_translate)
/Users/dgrady/anaconda/envs/python3/lib/python3.5/site-packages/pandas/core/strings.py in _wrap_result(self, result, use_codes, name)
1100 if not hasattr(result, 'ndim'):
1101 return result
-> 1102 name = name or getattr(result, 'name', None) or self._orig.name
1103
1104 if result.ndim == 1:
/Users/dgrady/anaconda/envs/python3/lib/python3.5/site-packages/pandas/core/generic.py in __nonzero__(self)
729 raise ValueError("The truth value of a {0} is ambiguous. "
730 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
--> 731 .format(self.__class__.__name__))
732
733 __bool__ = __nonzero__
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
This is presumably related to the pandas/core/strings.py
code in the stacktrace.
>>> pd.show_versions()
INSTALLED VERSIONS
------------------
commit: None
python: 3.5.1.final.0
python-bits: 64
OS: Darwin
OS-release: 14.5.0
machine: x86_64
processor: i386
byteorder: little
LC_ALL: None
LANG: en_US.UTF-8
pandas: 0.17.1
nose: 1.3.7
pip: 7.1.2
setuptools: 18.5
Cython: 0.23.4
numpy: 1.10.1
scipy: 0.16.0
statsmodels: 0.6.1
IPython: 4.0.1
sphinx: 1.3.1
patsy: 0.4.0
dateutil: 2.4.2
pytz: 2015.7
blosc: None
bottleneck: 1.0.0
tables: 3.2.2
numexpr: 2.4.4
matplotlib: 1.5.0
openpyxl: 2.2.6
xlrd: 0.9.4
xlwt: 1.0.0
xlsxwriter: 0.7.7
lxml: 3.4.4
bs4: 4.4.1
html5lib: None
httplib2: None
apiclient: None
sqlalchemy: 1.0.9
pymysql: None
psycopg2: None
Jinja2: 2.8