@article{fdi:010088757, title = {{T}owards the improvement of soil salinity mapping in a data-scarce context using {S}entinel-2 images in machine-learning models}, author = {{S}irpa-{P}oma, {J}. {W}. and {S}atg{\'e}, {F}r{\'e}d{\'e}ric and {R}esongles, {E}l{\'e}onore and {P}illco-{Z}olá, {R}. and {M}olina-{C}arpio, {J}. and {C}olque, {M}. {G}. {F}. and {O}rmachea, {M}. and {M}ollinedo, {P}. {P}. and {B}onnet, {M}arie-{P}aule}, editor = {}, language = {{ENG}}, abstract = {{S}everal recent studies have evidenced the relevance of machine-learning for soil salinity mapping using {S}entinel-2 reflectance as input data and field soil salinity measurement (i.e., {E}lectrical {C}onductivity-{EC}) as the target. {A}s soil {EC} monitoring is costly and time consuming, most learning databases used for training/validation rely on a limited number of soil samples, which can affect the model consistency. {B}ased on the low soil salinity variation at the {S}entinel-2 pixel resolution, this study proposes to increase the learning database's number of observations by assigning the {EC} value obtained on the sampled pixel to the eight neighboring pixels. {T}he method allowed extending the original learning database made up of 97 field {EC} measurements ({OD}) to an enhanced learning database made up of 691 observations ({ED}). {T}wo classification machine-learning models (i.e., {R}andom {F}orest-{RF} and {S}upport {V}ector {M}achine-{SVM}) were trained with both {OD} and {ED} to assess the efficiency of the proposed method by comparing the models' outcomes with {EC} observations not used in the models ' training. {T}he use of {ED} led to a significant increase in both models' consistency with the overall accuracy of the {RF} ({SVM}) model increasing from 0.25 (0.26) when using the {OD} to 0.77 (0.55) when using {ED}. {T}his corresponds to an improvement of approximately 208% and 111%, respectively. {B}esides the improved accuracy reached with the {ED} database, the results showed that the {RF} model provided better soil salinity estimations than the {SVM} model and that feature selection (i.e., {V}ariance {I}nflation {F}actor-{VIF} and/or {G}enetic {A}lgorithm-{GA}) increase both models ' reliability, with {GA} being the most efficient. {T}his study highlights the potential of machine-learning and {S}entinel-2 image combination for soil salinity monitoring in a data-scarce context, and shows the importance of both model and features selection for an optimum machine-learning set-up.}, keywords = {{S}entinel-2 ; machine-learning ; soil salinity ; data scarcity ; {R}andom-{F}orest ; {S}upport {V}ector {M}achine}, booktitle = {}, journal = {{S}ensors}, volume = {23}, numero = {23}, pages = {9328 [18 p.]}, year = {2023}, DOI = {10.3390/s23239328}, URL = {https://www.documentation.ird.fr/hor/fdi:010088757}, }