@article{fdi:010078833, title = {{A}nalysing the impact of soil spatial sampling on the performances of {D}igital {S}oil {M}apping models and their evaluation : a numerical experiment on {Q}uantile {R}andom {F}orest using clay contents obtained from {V}is-{NIR}-{SWIR} hyperspectral imagery}, author = {{L}agacherie, {P}. and {A}rrouays, {D}. and {B}ourennane, {H}. and {G}omez, {C}{\'e}cile and {N}kuba-{K}asanda, {L}.}, editor = {}, language = {{ENG}}, abstract = {{I}t has long been acknowledged that the soil spatial samplings used as inputs to {DSM} models are strong drivers - and often limiting factors - of the performances of such models. {H}owever, few studies have focused on evaluating this impact and identifying the related spatial sampling characteristics. {I}n this study, a numerical experiment was conducted on this topic using the pseudo values of topsoil clay content obtained from an airborne {V}isible {N}ear {I}nfra{R}ed-{S}hort {W}ave {I}nfra{R}ed ({V}is-{NIR}-{SWIR}) hyperspectral image in the {C}ap {B}on region ({T}unisia) as the source of the spatial sampling. {T}welve thousand {DSM} models were built by running a {R}andom {F}orest algorithm from soil spatial sampling of different sizes and average spacings (from 200 m to 2000 m) and different spatial distributions (from clustered to regularly distributed), aiming to mimic the various situations encountered when handling legacy data. {T}hese {DSM} models were evaluated with regard to both their prediction performances and their ability to estimate their overall and local uncertainties. {T}hree evaluation methods were applied: a model-based one, a classical model-free one using 25% of the sites removed from the initial soil data, and a reference one using a set of 100,000 independent sites selected by stratified random sampling over the entire region. {T}he results showed that: 1) {W}hile, as expected, the performances of the {DSM} models increased when the spacing of the sample increased, this increase was diminished for the smallest spacing as soon as 50% of the spatially structured variance was captured by the sampling, 2) {S}ampling that provided complete and even distributions in the geographical space and had as great spread of the target soil property as possible increased the {DSM} performances, while complete and even sampling distributions in the covariate space had less impacts, 3) {S}ystematic underestimations of the overall uncertainty of {DSM} models were observed, that were all the more important that the sparse samplings poorly covered the real distribution of the target soil property and that the dense sampling were unevenly distributed in the geographical space, 4) {T}he local uncertainties were underestimated for sparse sampling and over-estimated for dense sampling while being sensitive to the same sampling characteristics as overall uncertainty. {S}uch finding have practical outcomes on sampling strategies and {DSM} model evaluation that are discussed.}, keywords = {{TUNISIE} ; {CAP} {BON}}, booktitle = {}, journal = {{G}eoderma}, volume = {375}, numero = {}, pages = {114503 [12 ]}, ISSN = {0016-7061}, year = {2020}, DOI = {10.1016/j.geoderma.2020.114503}, URL = {https://www.documentation.ird.fr/hor/fdi:010078833}, }