@article{fdi:010084723, title = {{E}nsemble machine learning outperforms empirical equations for the ground heat flux estimation with remote sensing data}, author = {{B}onsoms, {J}. and {B}oulet, {G}illes}, editor = {}, language = {{ENG}}, abstract = {{E}stimating evapotranspiration at the field scale is a major component of sustainable water management. {D}ue to the difficulty to assess some major unknowns of the water cycle at that scale, including irrigation amounts, evapotranspiration is often computed as the residual of the instantaneous surface energy budget. {O}ne of the {S}urface {E}nergy {B}alance components with the largest uncertainties in their quantification over bare soils and sparse vegetation areas is the ground heat flux ({G}). {O}ver the last decades, the estimation of {G} with remote sensing ({RS}) data has been mainly achieved with empirical equations, on the basis of the {G} and net radiation ({R}n) ratio, {G}/{R}n. {T}he {G}/{R}n empirical equations generally require vegetation data ({T}ype {I} empirical equations), in combination with surface temperature ({T}s) and albedo ({T}ype {II} empirical equations). {I}n this article, we aim to evaluate the estimation of {G} with {RS} data. {H}ere, we compared eight {G}/{R}n empirical equations against two types of machine learning ({ML}) methods: an ensemble {ML} type, the {R}andom {F}orest ({RF}), and the {N}eural {N}etworks ({NN}). {T}he comparison of each method was evaluated using a wide range of climate and land cover datasets, including data from {E}ddy-{C}ovariance towers that extend along the mid-latitude areas that encompass the {E}uropean and {A}frican continents. {O}ur results have shown evidence that the driver of {G} in bare soils and sparse vegetation areas ({F}raction of {V}egetation, {F}v <= 0.25) is {T}s, instead of vegetation greenness indexes. {O}n the other hand, the accuracy in the estimation of {G} with {R}n, {T}s or {F}v decreases in densely vegetated areas ({F}v >= 0.50). {T}here are no significant differences between the most accurate {T}ype {I} and {II} empirical equations. {F}or bare soils and sparse vegetation areas the empirical equation which combines the {L}eaf {A}rea {I}ndex ({LAI}) and {T}s ({E}7) estimates {G} best. {I}n densely vegetated areas, an exponential empirical equation based on {F}v ({E}4), shows the best performance. {H}owever, {ML} better estimates {G} than the empirical equations, independently of the {F}v ranges. {A}n {RF} model with {R}n, {LAI} and {T}s as predictor variables shows the best accuracy and performance metrics, outperforming the {NN} model.}, keywords = {ground heat flux ; machine learning ; remote sensing ; surface energy ; balance ; {FRANCE} ; {NIGER} ; {MAROC} ; {TUNISIE}}, booktitle = {}, journal = {{R}emote {S}ensing}, volume = {14}, numero = {8}, pages = {1788 [17 p.]}, year = {2022}, DOI = {10.3390/rs14081788}, URL = {https://www.documentation.ird.fr/hor/fdi:010084723}, }