医疗影像文件读取方法

Medical Image File Formats

Medical Image File Formats 2014 Apr

DCM

Relevant Software

SimpleITK读取并显示dcm文件

Presentation

读取单张dcm文件

import SimpleITK as sitk
import numpy as np
import matplotlib.pyplot as plt
image = sitk.ReadImage(r"./729427_20181010_CT_5_249_011.dcm") # type(image) <class 'SimpleITK.SimpleITK.Image'>
image_array = np.squeeze(sitk.GetArrayFromImage(image))  # type(image_array) ->> <class 'numpy.ndarray'>  image_array.shape ->> (512, 512)
plt.imshow(image_array)
plt.show()

读取dcm文件夹

import SimpleITK as sitk

def read_dicom(PathDicom):
    # new ImageSeriesReader object
    series_reader = sitk.ImageSeriesReader()
    # GetGDCMSeriesFileNames
    dicom_names = series_reader.GetGDCMSeriesFileNames(PathDicom)
    # 通过之前获取到的序列的切片路径来读取该序列
    series_reader.SetFileNames(dicom_names)
    # 获取该序列对应的3D图像
    3D_image = series_reader.Execute()
    # sitk.GetArrayFromImage
    image_array = sitk.GetArrayFromImage(3D_image)
    return image_array, 3D_image

DICOM 数据处理 SimpleITK 需要注意的是,SimpleITK读取的图像数据的坐标顺序为zyx,即从多少张切片到单张切片的宽和高;而据SimpleITK Image获取的origin和spacing的坐标顺序则是xyz。这些需要特别注意。

PyQt5_DcmViewer_3d

pydicom库

Getting Started with pydicom

读取患者信息

import pydicom
from pydicom.data import get_testdata_files

# filename = "./dcom_sample/729427_20181010_CT_5_249_001.dcm"
filename = get_testdata_files("MR_small.dcm")[0]
ds = pydicom.dcmread(filename)  # plan dataset

详情见 👇

>>> import pydicom
>>> from pydicom.data import get_testdata_files
>>> filename = "100151270_20180808_CT_1_0000.dcm"
>>> ds = pydicom.dcmread(filename)
>>> ds
(0008, 0008) Image Type                          CS: ['DERIVED', 'PRIMARY', 'AXIAL']
(0008, 0016) SOP Class UID                       UI: CT Image Storage
(0008, 0018) SOP Instance UID                    UI: 1.2.840.113770.2.1.1374404734.2110896889.550030876
(0008, 0020) Study Date                          DA: '20180808'
...
(0010, 0010) Patient's Name                      PN: 'SUN^WEI^(??¨¬?^)'
(0010, 0020) Patient ID                          LO: '100151270'
(0010, 0030) Patient's Birth Date                DA: '19670329'
(0010, 0040) Patient's Sex                       CS: 'M'
...
(0020, 0011) Series Number                       IS: "1"
(0020, 0012) Acquisition Number                  IS: "0"
(0020, 0013) Instance Number                     IS: "0"
(0020, 0032) Image Position (Patient)            DS: ['-162.1999969', '-165.0000000', '-281.5000000']
...
(0028, 0010) Rows                                US: 512
(0028, 0011) Columns                             US: 512
(0028, 0030) Pixel Spacing                       DS: ['0.64453101', '0.64453101']
...
>>> ds['00100020']    # 使用TagID读取文件(患者)信息
(0010, 0020) Patient ID                          LO: '100151270'
>>> ds['00100020'].value
'100151270'
>>>

修改dcm_InstanceNumber

import pydicom

def resetInstanceNumber(file_dir):
    for root, dirs, files in os.walk(file_dir):
        for file in sorted(files):
            file_root_path = os.path.join(root, file)
            ds = pydicom.dcmread(file_root_path)        
            ds.InstanceNumber = len(files) - ds.InstanceNumber - 1
            ds.save_as(file_root_path)
  • 常用信息:姓名(ds.PatientName)、病人ID(ds.PatientID)、出生日期(ds.PatientBirthDate)、性别(ds.PatientSex)、治疗日期(StudyDate)、数据模态(Modality)、序列数(SeriesNumber)、机构名称(ds.InstitutionName)、(设备)生产厂商(ds.Manufacturer)、切片厚度(ds.SliceThickness)、像素间距(ds.PixelSpacing)、切片序号(InstanceNumber) 可阅读 DICOM之常用Tag

设置窗口窗位

参考:对于CT图像设置窗宽窗位

def setDicomWinWidthWinCenter(img_data, winwidth, wincenter, rows, cols):
    img_temp = img_data
    img_temp.flags.writeable = True
    min = (2 * wincenter - winwidth) / 2.0 + 0.5
    max = (2 * wincenter + winwidth) / 2.0 + 0.5
    dFactor = 255.0 / (max - min)

    for i in numpy.arange(rows):
        for j in numpy.arange(cols):
            img_temp[i, j] = int((img_temp[i, j]-min)*dFactor)

    min_index = img_temp < 0
    img_temp[min_index] = 0
    max_index = img_temp > 255
    img_temp[max_index] = 255

    return img_temp

INTERESTING CODE 👇


Others

  • 文件命名不规范,以PatientID_StudyDate_Modality_SeriesNumber_InstanceNumber为例进行重命名
# 依据InstanceNumber重命名文件
for root, dirs, files in os.walk(file_dir):
    for file in files:
        file_root_path = os.path.join(root, file)
        ds = pydicom.dcmread(file_root_path) 
        new_name = str(ds.PatientID) + "_" + str(ds.StudyDate) + "_" + str(ds.Modality) + "_"  + str(ds.SeriesNumber) + "_" + str(ds.InstanceNumber).zfill(4) + ".dcm"
        os.rename(file_root_path,os.path.join(root,new_name))

NRRD

Relevant Software

  • Slicer4
    • 注意事项:无法打开路径中含中文的.nrrd文件

pynrrd库

pynrrd_description 👉 Example usage 注:when write.data.shape(512,512,115) 耗时近310s

import numpy as np
import nrrd

# Some sample numpy data
data = np.zeros((5,4,3,2))
filename = 'testdata.nrrd'

# Write to a NRRD file
nrrd.write(filename, data)

# Read the data back from file
readdata, header = nrrd.read(filename)
print(readdata.shape)
print(header)

医疗影像数据预处理-nrrd

visualization 👇

from PIL import Image
import numpy as np
import nrrd

nrrd_filename = './testdata_even.nrrd'
nrrd_data, nrrd_options = nrrd.read(nrrd_filename)

# visualization
nrrd_image = Image.fromarray(nrrd_data[:,:,29]*1.5)  #nrrd_data[:,:,29] 表示截取第30张切片
nrrd_image.show() # 显示这图片

# save nrrd_image to "image.png"
nrrd_image = nrrd_image.convert("RGB")
nrrd_array = np.asarray(nrrd_image)
nrrd_image.save("./nrrd_image.png", "PNG")

发现visualization的图片呈左旋转显示效果,现使其正常显示

nrrd_data_29 = nrrd_data[:,:,29]*1.5
nrrd_data_29 = np.transpose(nrrd_data_29,(1,0))

nrrd_image = Image.fromarray(nrrd_data_29)  #nrrd_data[:,:,29] 表示截取第30张切片
nrrd_image.show() # 显示这图片

write

  • 方式一:nrrd.write('save_filename.nrrd', write_array)
  • 方式二:sitk.WriteImage(new_image, save_filename)

nifti数据(nii)

nibabel库

import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt

# show the nii_data.shape
nii_file = "ADNI_011_S_0010_MR_MPR__GradWarp__B1_Correction__N3__Scaled_Br_20061208114538147_S8800_I32270.nii"
data = nib.load(nii_file)   # data.shape (192, 192, 160)  # data.affine.shape (4, 4)
img = data.get_fdata()  
img = np.squeeze(img)   # img.shape (192, 192, 160)
# print(data.header) #数据头信息 

# 获取slice信息生成图像
def show_img(slices):
    fig, axes = plt.subplots(1, len(slices))
    for i, slice in enumerate(slices):
        axes[i].imshow(slice.T, cmap="gray", origin="lower")

#读取nifti文件中的slice数据
data = nib.load(nii_file)
img = data.get_fdata()

#获取单张slice数据
slice_0 = img[26, :, :]
slice_1 = img[:, 30, :]
slice_2 = img[:, :, 16]

#生成图表
show_img([slice_0, slice_1, slice_2])
plt.suptitle("show slice image")
plt.show()
  • nib.load()读取文件,会将图像向左旋转90° (一般不推荐使用,使用sitk.ReadImage()即可 # z,y,x

data.header 👇

<class 'nibabel.nifti1.Nifti1Header'> object, endian='>'
sizeof_hdr      : 348
data_type       : b''
db_name         : b'011_S_0010'
...
quatern_b       : 0.70710677
quatern_c       : -1.0713779e-09
quatern_d       : -0.70710677
qoffset_x       : 94.87749
qoffset_y       : 165.8339
qoffset_z       : 115.27711
...

Convert Format

dcm2nrrd

file_path = 'xxx' # Dicom序列所在的文件夹路径

# assign series_file_names
series_file_names = sitk.ImageSeriesReader.GetGDCMSeriesFileNames(file_path)
series_file_names_list = list(series_file_names)
series_file_names_list.sort()

# assign file_names
file_names = tuple(series_file_names_list)

# new ImageSeriesReader object
series_reader = sitk.ImageSeriesReader()
        
# 通过之前获取到的序列的切片路径来读取该序列
series_reader.SetFileNames(file_names)  

# 获取该序列对应的3D图像
image3D = series_reader.Execute()

# 查看该3D图像的尺寸
print("image3D.GetSize()",image3D.GetSize())

# 将序列保存为单个的DCM或者NRRD文件
# sitk.WriteImage(image3D, 'img3D.dcm')
save_filename = os.path.join(save_filepath, 'xxx.nrrd')
sitk.WriteImage(image3D, save_filename)

nrrd2nrrd

import SimpleITK as sitk

filename = '7 Vein  1.5  B30f.nrrd'
save_filename = 'tt.nrrd'

image = sitk.ReadImage(filename)
image_array = sitk.GetArrayFromImage(image)  # z,y,x

new_image = sitk.GetImageFromArray(image_array)

# Set new_image_Direction_Info
new_Direction = list(image.GetDirection())
new_Direction[-1] *= 2
new_image.SetDirection(tuple(new_Direction))

# Set new_image Other Space_Info
new_image.SetOrigin(image.GetOrigin())
new_image.SetSpacing(image.GetSpacing())

sitk.WriteImage(new_image, save_filename)

dcm2nii

import SimpleITK as sitk
import numpy as np


def load_dcm_array(dicom):

    if dicom is None:
        raise Exception('dicom is %s' % str(dicom))
    dcm_array = sitk.GetArrayFromImage(dicom)
    if '0028|1050' in dicom.GetMetaDataKeys():
        wnd_center = float(dicom.GetMetaData('0028|1050'))  # 窗宽
        wnd_width = float(dicom.GetMetaData('0028|1051'))  # 窗位
    else:
        wnd_center = 32768
        wnd_width = 65535
    gH = wnd_center + wnd_width / 2
    gL = wnd_center - wnd_width / 2

    # HU值 # "归一"至0-255
    dcm_array = (254 * (dcm_array - gL) / wnd_width) * (gH >= dcm_array) * (gL <= dcm_array) + 255 * (gH < dcm_array)
    dcm_array = np.squeeze(dcm_array)  # (1, H, W) → (H, W)

    return dcm_array.astype(int)


def read_dicom(dicom_dir):

    # 1. 使用正确的顺序读取dcm文件
    series_reader = sitk.ImageSeriesReader()
    dicom_names = series_reader.GetGDCMSeriesFileNames(dicom_dir)
    dcm_img_list = [sitk.ReadImage(f) for f in dicom_names]
    # 2. 使用窗宽、窗位,计算HU,消除灰色背景
    image_array = np.array([load_dcm_array(dcm_img) for dcm_img in dcm_img_list], dtype='float')

    return image_array
    

if __name__ == '__main__':

    dcm_dir = '605/P00057229/CT'
    array = read_dicom(dcm_dir)
    new_image = sitk.GetImageFromArray(array)
    sitk.WriteImage(new_image, 'CT_HU.nii')

Summary

读取

dcm

注意:文件序列名称是否与InstanceNumber一致,且检查是否为有效排序(是否需要文件名补0)

方式一:(不推荐)

import pydicom

ds = pydicom.dcmread('xxxx.dcm') 

方式二:

import SimpleITK as sitk

image = sitk.ReadImage(file_path)
image_array = sitk.GetArrayFromImage(image)  # z,y,x  # Ex.(1,512,512)

方式三:(批量读取)

import SimpleITK as sitk

def read_dicom(PathDicom):
    # new ImageSeriesReader object
    series_reader = sitk.ImageSeriesReader()
    # GetGDCMSeriesFileNames
    dicom_names = series_reader.GetGDCMSeriesFileNames(PathDicom)
    # 通过之前获取到的序列的切片路径来读取该序列
    series_reader.SetFileNames(dicom_names)
    # 获取该序列对应的3D图像
    image_3D = series_reader.Execute()
    # sitk.GetArrayFromImage
    image_array = sitk.GetArrayFromImage(image_3D)
    return image_array, image_3D

nrrd

方式一:

import nrrd

ori_nrrd, nrrd_header = nrrd.read('xxxx.nrrd')

方式二:

import SimpleITK as sitk

image = sitk.ReadImage(file_path)   
image_array = sitk.GetArrayFromImage(image)  # z,y,x  # Ex. (230,512,512) # (N, H, W)

写入

dcm

import pydicom

ds = pydicom.dcmread(file_root_path) 
ds.save_as(file_root_path)

nrrd

方式一: (慢)

import nrrd 

nrrd.write(filename, data, header)

方式二:

import SimpleITK as sitk

sitk.WriteImage(new_image, save_filename)

可视化

方式一:

from PIL import Image

nrrd_image = Image.fromarray(image_array)  # image_array is 2D
nrrd_image.show() 

方式二:

import matplotlib.pyplot as plt

plt.imshow(image_array)   # image_array is 2D
plt.show()