@article{fdi:010061389, title = {{A}nalysis of feature selection stability on high dimension and small sample data}, author = {{D}ernoncourt, {D}. and {H}anczar, {B}. and {Z}ucker, {J}ean-{D}aniel}, editor = {}, language = {{ENG}}, abstract = {{F}eature selection is an important step when building a classifier on high dimensional data. {A}s the number of observations is small, the feature selection tends to be unstable. {I}t is common that two feature subsets, obtained from different datasets but dealing with the same classification problem, do not overlap significantly. {A}lthough it is a crucial problem, few works have been done on the selection stability. {T}he behavior of feature selection is analyzed in various conditions, not exclusively but with a focus on t-score based feature selection approaches and small sample data. {T}he analysis is in three steps: the first one is theoretical using a simple mathematical model; the second one is empirical and based on artificial data; and the last one is based on real data. {T}hese three analyses lead to the same results and give a better understanding of the feature selection problem in high dimension data.}, keywords = {{F}eature selection ; {S}mall sample ; {S}tability ; {L}ow {N}/{D} ratio}, booktitle = {}, journal = {{C}omputational {S}tatistics and {D}ata {A}nalysis}, volume = {71}, numero = {{SI}}, pages = {681--693}, ISSN = {0167-9473}, year = {2014}, DOI = {10.1016/j.csda.2013.07.012}, URL = {https://www.documentation.ird.fr/hor/fdi:010061389}, }