我有一个User
的列表,我尝试根据id
属性返回其中一个。
from pydantic import BaseModel,ValidationError
import json
class User(BaseModel):
id: int
name: str
age: int
class Data():
def __init__(self, jsonfile : str) :
self.users_list= []
with open(jsonfile) as f:
users = json.load(f)
for u in users :
try :
self.users_list.append(User(**u))
except ValidationError as e:
print(e.json())
def get_user(self,id : int) -> User :
if self.users_list.index(id) : # How to search in my list of User for a specific id ?
return self.users_list.index(id)
else :
return None
我可以遍历User
的整个列表并检查它们的id
属性,但我想知道它们是否比
for u in self.users_list :
if u.id == id :
return u
如果你想要最快的搜索时间,我建议使用字典,键是用户的id。
我做了一个python测试文件来测试python中不同的数据结构,以找到最快的解决方案。
以下是我的研究结果,以毫秒为单位:
List Append Time: 0.8620707350000001
字典追加时间:1.2875515819999999
设置追加时间:1.325187916
List Find User Time: 0.0658235330000001
字典查找用户时间:2.4221999999962662e-05
设置查找用户时间:0.10515519500000003
从我的测试中可以看出,字典比列表或集合快很多。
import time
class Counter:
_start = float
def __init__(self):
self.start_time()
def start_time(self):
self._start = time.perf_counter()
return self._start
@property
def time_elapsed(self):
time_elapsed = time.perf_counter() - self._start
self.start_time()
return time_elapsed
class User:
id: int
def __init__(self, user_id):
self.id = user_id
def __eq__(self, other):
return self.id == other
def __hash__(self):
return hash(self.id + id(self))
user_dict = {}
user_set = set()
users = []
counter = Counter()
for i in range(0, 1000000):
user = User(i)
users.append(user)
print('List Append Time: ', counter.time_elapsed)
for i in range(0, 1000000):
user = User(i)
user_dict[str(user.id)] = user
print('Dict Append Time: ', counter.time_elapsed)
for i in range(0, 1000000):
user = User(i)
user_set.add(user)
print('Set Append Time: ', counter.time_elapsed)
for user in users:
if user == 500000:
break
print('List Find User Time: ', counter.time_elapsed)
found_user = user_dict['500000']
print('Dict Find User Time: ', counter.time_elapsed)
for user in user_set:
if user == 500000:
break
print('Set Find User Time: ', counter.time_elapsed)