From b3de781a565ad4195d133cbf3b581968769a12f0 Mon Sep 17 00:00:00 2001 From: Happy-zyy <617532750@qq.com> Date: Mon, 15 Oct 2018 16:55:01 +0800 Subject: [PATCH 1/2] Modify the forward selection process of greedy algorithm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 对备选特征做前向选择,即依次挑选特征加入训练集,如果得分高于当前最高分,则迭代最高分和特征集合,然后在迭代后的特征集合上继续执行前向选择。 --- MLFeatureSelection/sequence_selection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MLFeatureSelection/sequence_selection.py b/MLFeatureSelection/sequence_selection.py index 0905168..872344a 100644 --- a/MLFeatureSelection/sequence_selection.py +++ b/MLFeatureSelection/sequence_selection.py @@ -212,7 +212,7 @@ def _Greedy(self): for sub, i in enumerate(col): #forward sequence selection add one each round print(i) print('{}/{}'.format(sub,len(col))) - selectcol = self._Startcol[:] + selectcol = self._TemplUsedFeatures[:] selectcol.append(i) self._validation(selectcol, str(1+sub), i, coetest = 0) for sr, i in enumerate(self._TemplUsedFeatures[:-1]): # backward sequence selection, -2 becuase the last 2 is just selected From ca61760d458c9a97a42f42dd860f5a8c6cb2ff76 Mon Sep 17 00:00:00 2001 From: Happy-zyy <617532750@qq.com> Date: Mon, 15 Oct 2018 19:22:32 +0800 Subject: [PATCH 2/2] Random selection process to remove duplicates MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 随机选择过程对可能出现的特征组合进行去重 --- MLFeatureSelection/sequence_selection.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/MLFeatureSelection/sequence_selection.py b/MLFeatureSelection/sequence_selection.py index 872344a..0750f80 100644 --- a/MLFeatureSelection/sequence_selection.py +++ b/MLFeatureSelection/sequence_selection.py @@ -238,12 +238,15 @@ def _MyRandom(self,rl=[range(3,9),50]): for t in rl[0]: if t < len(col): print('add {} features'.format(t)) + have_selected = [] for i in range(rl[1]): selectcol = random.sample(col, t) - recordadd = selectcol[:] - for add in self._bestfeature: - selectcol.append(add) - self._validation(selectcol, str(i), str(recordadd)) + if sorted(selectcol) not in have_selected: #去重 提速 + have_selected.append(sorted(selectcol)) + recordadd = selectcol[:] + for add in self._bestfeature: + selectcol.append(add) + self._validation(selectcol, str(i), str(recordadd)) print('{0}{1}{2}'.format('-' * 20, 'complete random', '-' * 20)) @_reachlimit