# 影像组学学习笔记(5)-特征筛选之方差选择法

Formula.jpg
###### 方差选择法进行降维的代码实现：
``````import pandas as pd
import numpy as np
from sklearn.utils import shuffle
``````
``````xlsx1_filePath = 'C:/Users/RONG/Desktop/PythonBasic/data_A.xlsx'
xlsx2_filePath = 'C:/Users/RONG/Desktop/PythonBasic/data_B.xlsx'
rows_1,__ = data_1.shape
rows_2,__ = data_2.shape
data_1.insert(0,'label',[0]*rows_1)
data_2.insert(0,'label',[1]*rows_2)
data = pd.concat([data_1,data_2])
data = shuffle(data)
data = data.fillna(0)
``````
``````X = data[data.columns[0:]]
``````

``````# VarianceSelection
from sklearn.feature_selection import VarianceThreshold
selector = VarianceThreshold(1e10)  # 注意修改参数达到筛选目的
selector.fit_transform(X)
# print('EveryVaris:'+str(selector.variances_))
print('selectedFeatureIndex:'+str(selector.get_support(True)))
print('selectedFeatureNameis:'+str(X.columns[selector.get_support(True)]))
# print('excludedFeatureNameis:'+str(X.columns[~ selector.get_support()]))  # ‘~’取反
``````

Output:

``````# selectedFeatureIndex:[17 30 34 92]
# selectedFeatureNameis:Index(['original_firstorder_Energy', 'original_firstorder_TotalEnergy',
#        'original_glcm_ClusterProminence',
#        'original_glszm_LargeAreaHighGrayLevelEmphasis'],
#      dtype='object')
``````