@article{fdi:010090664, title = {{A}ssessing the utility of {M}unsell soil color in building and evaluating spectral models for soil clay content prediction}, author = {{D}harumarajan, {S}. and {G}omez, {C}{\'e}cile and {L}alitha, {M}. and {V}asundhara, {R}. and {H}egde, {R}. and {P}atil, {N}. {G}.}, editor = {}, language = {{ENG}}, abstract = {{T}he present study examined how the use of soil color can help build and evaluate clay content prediction models from laboratory visible and near infrared spectroscopic data. {T}his study was based on a regional database containing 449 soil samples collected over {K}arnataka state in {I}ndia, which has been divided into red soils (240 samples) and black soils (209 samples) based on their {M}unsell soil color. {P}artial least squares regression models were calibrated and validated from both the regional datasets and subsets stratified as red and black soils. {I}n addition, a random forest model was used to classify the validation soil samples into black and red classes to evaluate models' performance. {F}irst, while the clay content predicted by the regression model built from regional data was evaluated as correct at regional scale ({R}2val of 0.75), this model was evaluated as more accurate over black ({R}2val of 0.8) than red ({R}2val of 0.63) soil samples. {S}econd, the regression models built from subsets stratified per soil color provided different performances than the regression model built from the regional data, both at the regional scale and soil color scale. {I}n conclusion, this study demonstrated that (1) predictions are highly dependent on calibration data, (2) the interpretation of prediction performances relies heavily on validation data, and (3) pedological knowledge, such as soil color, can be effectively employed as an encouraging covariate in both the construction and evaluation of regression models. {P}rediction of clay from visible and near infrared spectroscopy was analyzed based on soil color. {M}odels were calibrated from regional database and subsets stratified by soil color. {G}lobal model yielded over- and underestimation of accuracy over red and black soils, respectively. {P}redictions are highly dependent on the calibration dataset. {M}odel performance evaluation is highly dependent on validation data stratification.}, keywords = {{INDE}}, booktitle = {}, journal = {{S}oil {S}cience {S}ociety of {A}merica {J}ournal}, volume = {[{E}arly access]}, numero = {}, pages = {[14 p.]}, ISSN = {0361-5995}, year = {2024}, DOI = {10.1002/saj2.20692}, URL = {https://www.documentation.ird.fr/hor/fdi:010090664}, }