在 R 单元格、rpy2、Jupyter 笔记本中使用熊猫数据帧时出错



我想在Jupyter Notebook中使用ggplot2。但是,当我尝试制作 R 魔术单元并引入变量时,出现错误。

这是代码(一个段落表示一个单元格(:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import rpy2
%matplotlib inline
from rpy2.robjects import pandas2ri
pandas2ri.activate()
%load_ext rpy2.ipython
%%R
library(ggplot2)
data = pd.read_csv('train_titanic.csv')
%%R -i data -w 900 -h 480 -u px

使用最后一个单元格,我得到以下错误(包括回溯(:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasdataframe(obj)
54         try:
---> 55             od[name] = conversion.py2rpy(values)
56         except Exception as e:
~/anaconda3/envs/catenv/lib/python3.7/functools.py in wrapper(*args, **kw)
839 
--> 840         return dispatch(args[0].__class__)(*args, **kw)
841 
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasseries(obj)
125             if type(x) is not homogeneous_type:
--> 126                 raise ValueError('Series can only be of one type, or None.')
127         # TODO: Could this be merged with obj.type.name == 'O' case above ?
ValueError: Series can only be of one type, or None.
During handling of the above exception, another exception occurred:
TypeError                                 Traceback (most recent call last)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_object(cls, obj)
367         try:
--> 368             mv = memoryview(obj)
369             res = cls.from_memoryview(mv)
TypeError: memoryview: a bytes-like object is required, not 'Series'
During handling of the above exception, another exception occurred:
AttributeError                            Traceback (most recent call last)
<ipython-input-14-75e210679e4a> in <module>
----> 1 get_ipython().run_cell_magic('R', '-i data -w 900 -h 480 -u px', 'nn')
~/anaconda3/envs/catenv/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
2360             with self.builtin_trap:
2361                 args = (magic_arg_s, cell)
-> 2362                 result = fn(*args, **kwargs)
2363             return result
2364 
</home/morgan/anaconda3/envs/catenv/lib/python3.7/site-packages/decorator.py:decorator-gen-130> in R(self, line, cell, local_ns)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
185     # but it's overkill for just that one bit of state.
186     def magic_deco(arg):
--> 187         call = lambda f, *a, **k: f(*a, **k)
188 
189         if callable(arg):
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/ipython/rmagic.py in R(self, line, cell, local_ns)
721                         raise NameError("name '%s' is not defined" % input)
722                 with localconverter(converter) as cv:
--> 723                     ro.r.assign(input, val)
724 
725         tmpd = self.setup_graphics(args)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
190                 kwargs[r_k] = v
191         return (super(SignatureTranslatedFunction, self)
--> 192                 .__call__(*args, **kwargs))
193 
194 
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
111 
112     def __call__(self, *args, **kwargs):
--> 113         new_args = [conversion.py2rpy(a) for a in args]
114         new_kwargs = {}
115         for k, v in kwargs.items():
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in <listcomp>(.0)
111 
112     def __call__(self, *args, **kwargs):
--> 113         new_args = [conversion.py2rpy(a) for a in args]
114         new_kwargs = {}
115         for k, v in kwargs.items():
~/anaconda3/envs/catenv/lib/python3.7/functools.py in wrapper(*args, **kw)
838                             '1 positional argument')
839 
--> 840         return dispatch(args[0].__class__)(*args, **kw)
841 
842     funcname = getattr(func, '__name__', 'singledispatch function')
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasdataframe(obj)
59                           'The error is: %s'
60                           % (name, str(e)))
---> 61             od[name] = StrVector(values)
62 
63     return DataFrame(od)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/vectors.py in __init__(self, obj)
382 
383     def __init__(self, obj):
--> 384         super().__init__(obj)
385         self._add_rops()
386 
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in __init__(self, obj)
286             super().__init__(obj)
287         elif isinstance(obj, collections.abc.Sized):
--> 288             super().__init__(type(self).from_object(obj).__sexp__)
289         else:
290             raise TypeError('The constructor must be called '
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_object(cls, obj)
370         except (TypeError, ValueError):
371             try:
--> 372                 res = cls.from_iterable(obj)
373             except ValueError:
374                 msg = ('The class methods from_memoryview() and '
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _(*args, **kwargs)
26 def _cdata_res_to_rinterface(function):
27     def _(*args, **kwargs):
---> 28         cdata = function(*args, **kwargs)
29         # TODO: test cdata is of the expected CType
30         return _cdata_to_rinterface(cdata)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_iterable(cls, iterable, populate_func)
317             if populate_func is None:
318                 cls._populate_r_vector(iterable,
--> 319                                        r_vector)
320             else:
321                 populate_func(iterable, r_vector)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _populate_r_vector(cls, iterable, r_vector)
300                                   r_vector,
301                                   cls._R_SET_VECTOR_ELT,
--> 302                                   cls._CAST_IN)
303 
304     @classmethod
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _populate_r_vector(iterable, r_vector, set_elt, cast_value)
237 def _populate_r_vector(iterable, r_vector, set_elt, cast_value):
238     for i, v in enumerate(iterable):
--> 239         set_elt(r_vector, i, cast_value(v))
240 
241 
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _as_charsxp_cdata(x)
430         return x.__sexp__._cdata
431     else:
--> 432         return conversion._str_to_charsxp(x)
433 
434 
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _str_to_charsxp(val)
118         s = rlib.R_NaString
119     else:
--> 120         cchar = _str_to_cchar(val)
121         s = rlib.Rf_mkCharCE(cchar, _CE_UTF8)
122     return s
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _str_to_cchar(s, encoding)
97 def _str_to_cchar(s, encoding: str = 'utf-8'):
98     # TODO: use isStrinb and installTrChar
---> 99     b = s.encode(encoding)
100     return ffi.new('char[]', b)
101 
AttributeError: 'float' object has no attribute 'encode'

所以我发现在导入我的熊猫数据帧对象时甚至无法启动 R 魔术单元。但是,我尝试在单元格内创建 R 向量,发现我可以毫无问题地使用ggplot2绘制这些向量。

我正在使用Python 3.7.6rpy2 3.1.0jupyter-notebook 6.0.3,并且正在Linux的Windows子系统上使用Ubuntu 18.04.2 LTS

问题很可能是一列(或多列(具有多种类型 - 因此不可能将数据传输到 R 向量(只能容纳一种数据类型(。回溯可能是压倒性的,但这是相关的部分:

ValueError: Series can only be of one type, or None.

这是哪一列?如果不查看您加载的数据集,很难说,但我的一般解决方案是检查列中的类型:

types = data.applymap(type).apply(set)
types[types.apply(len) > 1]

上面代码段返回的任何内容都将是候选罪魁祸首。有许多不同的方法来处理这个问题,具体取决于数据的确切性质。我经常使用的解决方法包括:

  • 调用data = data.infer_objects()- 如果熊猫没有赶上 dtype 更改并且仍然使用(次优(Python 对象存储数据会有所帮助
  • 如果字符串列中有缺失值,则用空字符串或字符串常量填充NaN(例如str_columns = str_columns.fillna('')(
  • dates.apply(pd.to_datetime, axis=1)如果您有datetime对象,但 dtype 是对象
  • 如果混合了date对象和datetime对象,请使用df.applymap(lambda x: datetime.combine(x, datetime.min.time()) if not isinstance(x, datetime) else x)

在某些罕见的情况下,pandas 存储的数据与 rpy2 的预期不同(遵循某些操作(;然后将数据帧写入 csv 文件并再次从磁盘读取它会有所帮助 - 但这可能不是你在这里面对的,因为你从新读取的数据帧开始。

我只是注意到这个问题可能有一个更简单的原因。出于某种原因,pandas2ri要求您在导入后调用pandas2ri.activate()。这为我解决了问题。

最新更新