我正在尝试使用ctypes从Python(3(程序调用iconv(3)
。iconv
的 C 类型签名是
size_t iconv(iconv_t cd,
char **inptr, size_t *inbytesleft,
char **outptr, size_t *outbytesleft);
你应该这样称呼它:
char *inp = "abcdef";
char outbuf[16];
char *outp = outbuf;
size_t ibytes = strlen(inbuf);
size_t obytes = sizeof outbuf;
size_t rv = iconv(cd, &inp, &ibytes, &outp, &obytes);
显然,它会写入outbuf
,并且它还会修改所有四个变量inp
、outp
、ibytes
和obytes
,以指示它在遇到问题(如果有的话(之前转换的程度。 它保证不会写入输入字符串,尽管它不是const
。
现在,天真地,你像这样ctypes
反映这一点:
iconv = libc.iconv
iconv.restype = ctypes.c_size_t
iconv.argtypes = [ctypes.c_void_p,
ctypes.POINTER(ctypes.c_char_p),
ctypes.POINTER(ctypes.c_size_t),
ctypes.POINTER(ctypes.c_char_p),
ctypes.POINTER(ctypes.c_size_t)]
(iconv_t
是我正在测试的 C 库中void *
的 typedef(但是当我尝试调用它时,我收到错误:
>>> obuf = ctypes.create_string_buffer(16)
>>> obuflen = ctypes.c_size_t(16)
>>> iconv(utf8_to_utf16,
... ctypes.byref(ctypes.c_char_p(b"abcdef")),
... ctypes.byref(ctypes.c_size_t(6)),
... ctypes.byref(obuf),
... ctypes.byref(obuflen))
ArgumentError: argument 4: <class 'TypeError'>: expected LP_c_char_p
instance instead of pointer to c_char_Array_16
尝试将obuf
显式转换为c_char_p
也不起作用:
>>> optr = ctypes.c_char_p(obuf)
TypeError: bytes or integer address expected instead of c_char_Array_16 instance
它在错误消息中使用的这些类型名称没有出现在手册中,我很困惑。 正确的方法是什么?
(如果你想知道为什么我想这样做而不是使用Python的内置编码转换器,简短的版本是因为Python的转换器不支持与[GNU] iconv相同的编码集,也没有//TRANSLIT
功能。
下面是我用来查找字符的代码,这些字符通过翻译转换为""。抱歉,它不是很精致,但我希望它仍然可以供某人使用。:)
我需要在python2.6下运行它,所以我试图写得更通用一些。(2.6 有一些陷阱,所以代码在 python3 下可能会更好、更简单(
from __future__ import print_function
import sys
import ctypes
libc = ctypes.cdll.LoadLibrary("libc.so.6")
if sys.version_info[0]>2:
def unichr(a):
return chr(a)
LP_c_char2 = ctypes.POINTER( ctypes.c_char_p)
LP_c_char = ctypes.POINTER(ctypes.create_string_buffer(16).__class__)
get_errno_loc = libc.__errno_location
get_errno_loc.restype = ctypes.POINTER(ctypes.c_int)
class MyError(OSError):
def __init__(self, e):
if sys.version_info[0]<=2:
super(MyError, self).__init__(e)
else:
super().__init__(e)
def errcheck(ret, func, args):
if ret == -1 or ret == 2**64-1:
e = get_errno_loc()[0]
raise MyError(e)
return ret
iconv_open = libc.iconv_open
iconv_open.restype = ctypes.c_void_p
ret = iconv_open(
ctypes.c_char_p(b"ISO8859-2//TRANSLIT"),
ctypes.c_char_p(b"UTF-8"))
iconv = libc.iconv
iconv.errcheck = errcheck
iconv.restype = ctypes.c_size_t
obuf = ctypes.create_string_buffer(16)
obuflen = ctypes.c_size_t(16)
optr = LP_c_char(obuf)
inp = b'xe2x80x9c'
iconv.argtypes = [ctypes.c_void_p, LP_c_char2, ctypes.POINTER(ctypes.c_size_t), ctypes.POINTER(LP_c_char), ctypes.POINTER(ctypes.c_size_t)]
r = iconv(ret, LP_c_char2(ctypes.c_char_p(inp)), ctypes.byref(ctypes.c_size_t(len(inp))), ctypes.byref(optr), ctypes.byref(obuflen))
print(obuf.value)
def func(inp = b"bbb"):
assert len(inp)<16 , "too big input"
obuf = ctypes.create_string_buffer(16)
obuflen = ctypes.c_size_t(16)
optr = LP_c_char(obuf)
r = iconv(ret, LP_c_char2(ctypes.c_char_p(inp)), ctypes.byref(ctypes.c_size_t(len(inp))), ctypes.byref(optr), ctypes.byref(obuflen))
return obuf, obuflen, r, obuf.value[:16-obuflen.value]
oo, uu, r, vys = func(b'xe2x80x9cxe2x80x9c')
print(oo.raw, uu, r, vys)
for i in range(sys.maxunicode):
try:
oo, uu, r, vys = func(unichr(i).encode('utf-8'))
if vys==b'"':
print(i, unichr(i))
except UnicodeEncodeError:
pass
except MyError as E:
pass
# print("MyError: {E} , {i}".format(E=E, i=i))