# 通过遗传算法来优化特征工程

ai_cover.jpeg

#### 初始化种群

• chromesome 可以看成个体，每一个样本都是对样本一个特征选择方案
• population 是由一定数量的个体组成，我们接下来就是通过不断循环、选择、交叉和变异这个过程不断去优化种群，也就是优化方案。
``````def initilization_of_population(size,n_feat):
population = []
for i in range(size):
chromosome = np.ones(n_feat,dtype=np.bool)
chromosome[:int(0.3*n_feat)]=False
np.random.shuffle(chromosome)
population.append(chromosome)
return population
``````

``````temp = np.ones(10,dtype=bool)
temp[:int(0.3*10)] = False
print(temp)
np.random.shuffle(temp)
print(temp)
``````
``````[False False False  True  True  True  True  True  True  True]
[ True  True  True  True False False  True  True False  True]
``````

#### 评估种群个体适应度

``````def fitness_score(population):
scores = []
for chromosome in population:
#
logmodel.fit(X_train.iloc[:,chromosome],y_train)
predictions = logmodel.predict(X_test.iloc[:,chromosome])
scores.append(accuracy_score(y_test,predictions))
scores, population = np.array(scores), np.array(population)
inds = np.argsort(scores)
return list(scores[inds][::-1]), list(population[inds,:][::-1])
``````

#### 选择

``````def selection(pop_after_fit,n_parents):
population_nextgen = []
for i in range(n_parents):
population_nextgen.append(pop_after_fit[i])
return population_nextgen
``````

#### 交叉

``````def crossover(pop_after_sel):
population_nextgen=pop_after_sel
for i in range(len(pop_after_sel)):
child=pop_after_sel[i]
child[3:7]=pop_after_sel[(i+1)%len(pop_after_sel)][3:7]
population_nextgen.append(child)
return population_nextgen
``````

#### 变异

``````def mutation(pop_after_cross,mutation_rate):
population_nextgen = []
for i in range(0,len(pop_after_cross)):
chromosome = pop_after_cross[i]
for j in range(len(chromosome)):
if random.random() < mutation_rate:
chromosome[j]= not chromosome[j]
population_nextgen.append(chromosome)
#print(population_nextgen)
return population_nextgen
``````

#### 迭代过程

``````def generations(size,n_feat,n_parents,mutation_rate,n_gen,X_train,
X_test, y_train, y_test):
best_chromo= []
best_score= []
# 初始化迭代
population_nextgen=initilization_of_population(size,n_feat)
for i in range(n_gen):
# 200
scores, pop_after_fit = fitness_score(population_nextgen)
print(scores[:2])
# 200 -> 100
pop_after_sel = selection(pop_after_fit,n_parents)
# 100 -> 200
pop_after_cross = crossover(pop_after_sel)
# 200 -> 200
population_nextgen = mutation(pop_after_cross,mutation_rate)

best_chromo.append(pop_after_fit[0])
best_score.append(scores[0])
return best_chromo,best_score
``````

#### 运行代码

``````chromo,score=generations(size=200,n_feat=30,n_parents=100,mutation_rate=0.10,
n_gen=38,X_train=X_train,X_test=X_test,y_train=y_train,y_test=y_test)
logmodel.fit(X_train.iloc[:,chromo[-1]],y_train)
predictions = logmodel.predict(X_test.iloc[:,chromo[-1]])
print("Accuracy score after genetic algorithm is= "+str(accuracy_score(y_test,predictions)))
``````
