@article{fdi:010072026, title = {{A}n approach to optimizing abstaining area for small sample data classification}, author = {{H}anczar, {B}. and {Z}ucker, {J}ean-{D}aniel}, editor = {}, language = {{ENG}}, abstract = {{G}iven a classification task, an approach to improve accuracy relies on the use of abstaining classifiers. {T}hese classifiers are trained to reject observations for which predicted values are not reliable enough: these rejected observations belong to an abstaining area in the feature space. {T}wo equivalent methods exist to theoretically compute the optimal abstaining area for a given classification problem. {T}he first one is based on the posterior probability computed by the model and the other is based on the derivative of the {ROC} function of the model. {A}lthough the second method has proved to give the best results, in small-sample settings such as the one found in omits data, the estimation of posterior probabilities and derivative of {ROC} curve are both lacking of precision leading to far from optimal abstaining areas. {A}s a consequence none of the two methods bring the expected improvements in accuracy. {W}e propose five alternative algorithms to compute the abstaining area adapted to small-sample problems. {T}he idea of these algorithms is to compute an accurate and robust estimation of the {ROC} curve and its derivatives. {T}hese estimation are mainly based on the assumption that the distribution of the output of the classifier for each class is normal or mixture of normal distributions. {T}hese distributions are estimated by a kernel density estimator or {B}ayesian semiparametric estimator. {A}nother method works on the approximation of the convex hull of the {ROC} curve. {O}nce the derivative of the {ROC} curve are estimated, the optimal abstaining area can be directly computed. {T}he performance of our algorithms are directly related to their capacity to compute an accurate estimation of the {ROC} curve. {A} sensitivity analysis of our methods to the dataset size and rejection cost has been done on a set of experiments. {W}e show that our methods improve the performances of the abstaining classifiers on several real datasets and for different learning algorithms.}, keywords = {{S}upervised leaming ; {R}eject option ; {S}mall-sample setting ; {A}bstaining classifier ; {ROC} curve estimation}, booktitle = {}, journal = {{E}xpert {S}ystems with {A}pplications}, volume = {95}, numero = {}, pages = {153--161}, ISSN = {0957-4174}, year = {2018}, DOI = {10.1016/j.eswa.2017.11.013}, URL = {https://www.documentation.ird.fr/hor/fdi:010072026}, }