@article{fdi:010091945, title = {{S}elf-supervised learning of {V}ision {T}ransformers for digital soil mapping using visual data}, author = {{T}resson, {P}aul and {D}umont, {M}. and {J}aeger, {M}. and {B}orne, {F}. and {B}oivin, {S}. and {M}arie-{L}ouise, {L}. and {F}ran{\c{c}}ois, {J}. and {B}oukcim, {H}. and {G}oeau, {H}.}, editor = {}, language = {{ENG}}, abstract = {{I}n arid environments, prospecting cultivable land is challenging due to harsh climatic conditions and vast, hard- to-access areas. {H}owever, the soil is often bare, with little vegetation cover, making it easy to observe from above. {H}ence, remote sensing can drastically reduce costs to explore these areas. {F}or the past few years, deep learning has extended remote sensing analysis, first with {C}onvolutional {N}eural {N}etworks ({CNN}s), then with {V}ision {T}ransformers ({V}i{T}s). {T}he main drawback of deep learning methods is their reliance on large calibration datasets, as data collection is a cumbersome and costly task, particularly in drylands. {H}owever, recent studies demonstrate that {V}i{T}s can be trained in a self-supervised manner to take advantage of large amounts of unlabelled data to pre-train models. {T}hese backbone models can then be finetuned to learn a supervised regression model with few labelled data. {I}n our study, we trained {V}i{T}s in a self-supervised way with a 9500 km2 2 satellite image of dry-lands in {S}audi {A}rabia with a spatial resolution of 1.5 m per pixel. {T}he resulting models were used to extract features describing the bare soil and predict soil attributes (p{H} {H}2{O}, 2 {O}, p{H} {KC}l, {S}i composition). {U}sing only {RGB} data, we can accurately predict these soil properties and achieve, for instance, an {RMSE} of 0.40 +/- 0.03 when predicting alkaline soil p{H}. {W}e also assess the effectiveness of adding additional covariates, such as elevation. {T}he pretrained models can as well be used as visual features extractors. {T}hese features can be used to automatically generate a clustered map of an area or as input of random forests models, providing a versatile way to generate maps with limited labelled data and input variables.}, keywords = {{S}elf-supervised learning ; {V}ision transformers ; {D}igital soil mapping ; {A}rid lands}, booktitle = {}, journal = {{G}eoderma}, volume = {450}, numero = {}, pages = {117056 [9 p.]}, ISSN = {0016-7061}, year = {2024}, DOI = {10.1016/j.geoderma.2024.117056}, URL = {https://www.documentation.ird.fr/hor/fdi:010091945}, }