@article{fdi:010087709, title = {{I}ntegrating additional spectroscopically inferred soil data improves the accuracy of digital soil mapping}, author = {{C}hen, {S}. {C}. and {S}aby, {N}. {P}. {A}. and {M}artin, {M}. {P}. and {B}arth{\`e}s, {B}ernard and {G}omez, {C}{\'e}cile and {S}hi, {Z}. and {A}rrouays, {D}.}, editor = {}, language = {{ENG}}, abstract = {{D}igital soil mapping has been increasingly advocated as an efficient approach to deliver fine-resolution and up-to-date soil information in evaluating soil ecosystem services. {C}onsidering the great spatial heterogeneity of soils, it is widely recognized that more representative soil observations are needed for better capturing the soil spatial variation and thus to increase the accuracy of digital soil maps. {I}n reality, the budget for the field work and soil laboratory analysis is commonly limited due to its high cost and low efficiency. {I}n the last two decades, being an alternative to wet chemistry, soil spectroscopy, such as visible-near infrared ({V}is-{NIR}), mid-infrared ({MIR}) spectroscopy has been developed in measuring soil information in a rapid and cost-effective manner and thus enable to collect more soil information for digital soil mapping ({DSM}). {H}owever, spectroscopically inferred ({SI}) data are subject to higher uncertainties than reference laboratory analysis. {M}any {DSM} practices integrated {SI} data with soil observations into spatial modelling while few studies addressed the key question that whether these non-errorless soil data improve map accuracy in {DSM}. {I}n this study, {F}rench {S}oil {M}onitoring {N}etwork ({RMQS}) and {L}and {U}se and {C}overage {A}rea frame {S}urvey {S}oil ({LUCAS} {S}oil) datasets were used to evaluate the potential of {SI} data from {V}is-{NIR} and {MIR} in digital mapping of soil properties (i.e. soil organic carbon, clay, and p{H}) at a national scale. {C}ubist and quantile regression forests were used for spectral predictive modelling and {DSM} modelling, respectively. {F}or both {RMQS} and {LUCAS} {S}oil dataset, different scenarios regarding varying proportions of {SI} data and laboratory observations were tested for spectral predictive models and {DSM} models. {R}epeated (50 times) external validation suggested that adding additional {SI} data can improve the performance of {DSM} models regardless of soil properties (gain of {R}2 proportion at 3-19%) when the laboratory observations are limited (<= 50%). {L}ower proportion of {SI} data used in {DSM} model and higher accuracy of spectral predictive models led to greater improvement of {DSM}. {O}ur results also showed that a greater proportion of {SI} data lowered the prediction intervals which may result in an underestimation of prediction uncertainty. {T}he determination of accuracy threshold on {SI} data for the use in {DSM} needs to be explored in future studies.}, keywords = {{P}roximal soil sensing ; {V}is-{NIR} spectroscopy ; {MIR} spectroscopy ; {D}igital ; soil mapping ; {M}easurement error ; {FRANCE}}, booktitle = {}, journal = {{G}eoderma}, volume = {433}, numero = {}, pages = {116467 [11 p.]}, ISSN = {0016-7061}, year = {2023}, DOI = {10.1016/j.geoderma.2023.116467}, URL = {https://www.documentation.ird.fr/hor/fdi:010087709}, }