我正在编写一个用于缓存的小型库。Python dict不适合我,已经尝试了std::map,得到了带有类似错误的SIGSEGV。总之,整个要点在下面的日志中进行了描述。我做错了什么?有其他方法可以在C中存储对象吗?
问题:
(gdb) run
The program being debugged has been started already.
Start it from the beginning? (y or n) y
Starting program: /usr/bin/python3
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/usr/lib/libthread_db.so.1".
Python 3.9.2 (default, Feb 20 2021, 18:40:11)
[GCC 10.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> from syncached import cache
>>> cache.push(1, object())
>>> cache.get(1) == object()
True
>>> cache.get(1) == object()
Program received signal SIGSEGV, Segmentation fault.
0x00007ffff7d049d0 in PyMem_Calloc () from /usr/lib/libpython3.9.so.1.0
(gdb) bt
#0 0x00007ffff7d049d0 in PyMem_Calloc () from /usr/lib/libpython3.9.so.1.0
#1 0x00007ffff7cfb27d in PyList_New () from /usr/lib/libpython3.9.so.1.0
#2 0x00007ffff7d6f4e3 in ?? () from /usr/lib/libpython3.9.so.1.0
#3 0x00007ffff7de2e37 in PyAST_CompileObject () from /usr/lib/libpython3.9.so.1.0
#4 0x00007ffff7de2c3b in ?? () from /usr/lib/libpython3.9.so.1.0
#5 0x00007ffff7cf68ab in ?? () from /usr/lib/libpython3.9.so.1.0
#6 0x00007ffff7cf6a63 in PyRun_InteractiveLoopFlags () from /usr/lib/libpython3.9.so.1.0
#7 0x00007ffff7c84f6b in PyRun_AnyFileExFlags () from /usr/lib/libpython3.9.so.1.0
#8 0x00007ffff7c7965c in ?? () from /usr/lib/libpython3.9.so.1.0
#9 0x00007ffff7dc9fa9 in Py_BytesMain () from /usr/lib/libpython3.9.so.1.0
#10 0x00007ffff7a46b25 in __libc_start_main () from /usr/lib/libc.so.6
#11 0x000055555555504e in _start ()
pyhashmap.c:
#include "Python.h"
#include <stdlib.h>
typedef struct {
Py_hash_t key;
PyObject *val;
} hashmap_member;
typedef struct {
size_t cache_size;
size_t currsize;
hashmap_member *list;
} pyhashmap;
pyhashmap *new_map(size_t size){
pyhashmap *map = PyMem_Malloc(sizeof(pyhashmap));
map->cache_size = size;
map->currsize = 0;
map->list = PyMem_Malloc(size*sizeof(hashmap_member));
return map;
}
void map_insert(pyhashmap *map, Py_hash_t key, PyObject *val){
if (map->currsize == map->cache_size){
return;
}
for (size_t i = 0; i < map->currsize; i++){
if (map->list[i].key == key){
return;
}
}
map->list[map->currsize] = (hashmap_member) {.key = key, .val = val};
map->currsize++;
}
PyObject *map_get(pyhashmap *map, Py_hash_t key){
for (size_t i = 0; i < map->currsize; i++){
if (map->list[i].key == key){
return map->list[i].val;
}
}
return Py_None;
}
ipyhashmap.pxd:
cdef extern from "pyhashmap.c":
ctypedef struct pyhashmap
pyhashmap *new_map(size_t)
void map_insert(pyhashmap *, int, object)
object map_get(pyhashmap *, int)
cache.pyx:
from syncached.ipyhashmap cimport pyhashmap, new_map, map_insert, map_get
cdef pyhashmap *map = new_map(5)
cpdef push(int key, object val):
map_insert(map, key, val)
cpdef get(key):
return map_get(map, key)
另外,第二个问题:
>>> cache.push(3, {"a": "B"})
>>> cache.get(3)
{3: 3, ((<NULL>, 'get'), ('cache', 'get')): ((((((...), ()), None), (3, None)), 'get'), ('cache', 'get')), ((((((...), None), None), ((((...), 'get'), ((...), 'get')), None)), 'get'), ()): ((((((...), 'get'), None), (((...), 'get'), None)), 'get'), ()), ((((...), 'get'), None), (((...), 'get'), None)): ((((...), 'get'), None), (((...), 'get'), None)), 'Py_Repr': [{...}, [...]]}
>>> cache.get(3)
KeyError: 'unknown symbol table entry'
>>> cache.get(3)
[1] 21720 segmentation fault (core dumped) python3
我可以推荐的最好方法是,通过将存储的python对象存储在python列表、dict或set中,来防止它们在自定义映射中被垃圾收集。这将确保在存储对象时引用计数不会降至零。
选项2:手动管理存储的python对象引用计数
在处理指向Python对象(PyObject*(的指针时,可以尝试手动管理引用计数。如果增加引用计数,但不减少相同的次数,则对象在不再使用时将永远不会从内存中删除。应用程序无法回收已占用的内存,即内存将泄漏。但是,如果您不增加引用计数,当您还在C代码中引用该对象时,该对象可能会被删除。
您可以使用Python的Reference Counting API中的Py_INCREF和Py_DECREF,尝试在C中直接管理内存(请参阅这里类似问题的答案(。如果允许您使用C++而不是C,那么RAII可以简化参考计数管理。