加快从 DICOM 结构集中提取坐标的速度



使用numpy.reshape有很大帮助,使用map帮助很大。有没有可能再加快一点?

import pydicom
import numpy as np
import cProfile
import pstats

def parse_coords(contour):
    """Given a contour from a DICOM ROIContourSequence, returns coordinates
    [loop][[x0, x1, x2, ...][y0, y1, y2, ...][z0, z1, z2, ...]]"""
    if not hasattr(contour, "ContourSequence"):
        return [] # empty structure
    def _reshape_contour_data(loop):
        return np.reshape(np.array(loop.ContourData),
                          (3, len(loop.ContourData) // 3),
                          order='F')
    return list(map(_reshape_contour_data,contour.ContourSequence))

def profile_load_contours():
    rs = pydicom.dcmread('RS.gyn1.dcm')
    structs = [parse_coords(contour) for contour in rs.ROIContourSequence]

cProfile.run('profile_load_contours()','prof.stats')
p = pstats.Stats('prof.stats')
p.sort_stats('cumulative').print_stats(30)

使用从瓦里安日食导出的真实结构集。

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   12.165   12.165 {built-in method builtins.exec}
        1    0.151    0.151   12.165   12.165 <string>:1(<module>)
        1    0.000    0.000   12.014   12.014 load_contour_time.py:19(profile_load_contours)
        1    0.000    0.000   11.983   11.983 load_contour_time.py:21(<listcomp>)
       56    0.009    0.000   11.983    0.214 load_contour_time.py:7(parse_coords)
50745/33837    0.129    0.000   11.422    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/dataset.py:455(__getattr__)
50741/33825    0.152    0.000   10.938    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/dataset.py:496(__getitem__)
    16864    0.069    0.000    9.839    0.001 load_contour_time.py:12(_reshape_contour_data)
    16915    0.101    0.000    9.780    0.001 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/dataelem.py:439(DataElement_from_raw)
    16915    0.052    0.000    9.300    0.001 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/values.py:320(convert_value)
    16864    0.038    0.000    7.099    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/values.py:89(convert_DS_string)
    16870    0.042    0.000    7.010    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/valuerep.py:495(MultiString)
    16908    1.013    0.000    6.826    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/multival.py:29(__init__)
  3004437    3.013    0.000    5.577    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/multival.py:42(number_string_type_constructor)
3038317/3038231    1.037    0.000    3.171    0.000 {built-in method builtins.hasattr}

大部分时间都在convert_DS_string.有可能让它更快吗?我想部分问题在于坐标没有非常有效地存储在DICOM文件中。

编辑:为了避免MultiVal.__init__末尾的循环,我想知道如何获取每个 ContourData 的原始双字符串并在其上使用numpy.fromstring。但是,我无法获得原始双字符串。

消除MultiVal.__init__中的循环并使用numpy.fromstring可提供 4 倍以上的加速。我将在pydicom github上发布,看看是否有兴趣将其纳入库代码。有点丑。我欢迎就进一步改进提出建议。

import pydicom
import numpy as np
import cProfile
import pstats

def parse_coords(contour):
    """Given a contour from a DICOM ROIContourSequence, returns coordinates
    [loop][[x0, x1, x2, ...][y0, y1, y2, ...][z0, z1, z2, ...]]"""
    if not hasattr(contour, "ContourSequence"):
        return [] # empty structure
    cd_tag = pydicom.tag.Tag(0x3006, 0x0050) # ContourData tag
    def _reshape_contour_data(loop):
        val = super(loop.__class__, loop).__getitem__(cd_tag).value
        try:
            double_string = val.decode(encoding='utf-8')
            double_vec = np.fromstring(double_string, dtype=float, sep=chr(92)) # 92 is '/'
        except AttributeError: # 'MultiValue'  has no 'decode' (bytes does)
            # It's already been converted to doubles and cached
            double_vec = loop.ContourData
        return np.reshape(np.array(double_vec),
                          (3, len(double_vec) // 3),
                          order='F')
    return list(map(_reshape_contour_data, contour.ContourSequence))

def profile_load_contours():
    rs = pydicom.dcmread('RS.gyn1.dcm')
    structs = [parse_coords(contour) for contour in rs.ROIContourSequence]

profile_load_contours()
cProfile.run('profile_load_contours()','prof.stats')
p = pstats.Stats('prof.stats')
p.sort_stats('cumulative').print_stats(15)

结果

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    2.800    2.800 {built-in method builtins.exec}
        1    0.017    0.017    2.800    2.800 <string>:1(<module>)
        1    0.000    0.000    2.783    2.783 load_contour_time3.py:29(profile_load_contours)
        1    0.000    0.000    2.761    2.761 load_contour_time3.py:31(<listcomp>)
       56    0.006    0.000    2.760    0.049 load_contour_time3.py:9(parse_coords)
  153/109    0.001    0.000    2.184    0.020 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/dataset.py:455(__getattr__)
   149/97    0.001    0.000    2.182    0.022 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/dataset.py:496(__getitem__)
       51    0.000    0.000    2.178    0.043 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/dataelem.py:439(DataElement_from_raw)
       51    0.000    0.000    2.177    0.043 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/values.py:320(convert_value)
       44    0.000    0.000    2.176    0.049 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/values.py:255(convert_SQ)
       44    0.035    0.001    2.176    0.049 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/filereader.py:427(read_sequence)
   152/66    0.000    0.000    2.171    0.033 {built-in method builtins.hasattr}
    16920    0.147    0.000    1.993    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/filereader.py:452(read_sequence_item)
    16923    0.116    0.000    1.267    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/filereader.py:365(read_dataset)
    84616    0.113    0.000    0.699    0.000 /home/cf/python/venv/lib/python3.5/site-packages/pydicom/dataset.py:960(__setattr__)

最新更新