@article{fdi:010082762, title = {{C}ereal yield forecasting with satellite drought-based indices, weather data and regional climate indices using machine learning in {M}orocco}, author = {{B}ouras, {E}. and {J}arlan, {L}ionel and {E}r-{R}aki, {S}. and {B}alaghi, {R}. and {A}mazirh, {A}. and {R}ichard, {B}. and {K}habba, {S}.}, editor = {}, language = {{ENG}}, abstract = {{A}ccurate seasonal forecasting of cereal yields is an important decision support tool for countries, such as {M}orocco, that are not self-sufficient in order to predict, as early as possible, importation needs. {T}his study aims to develop an early forecasting model of cereal yields (soft wheat, barley and durum wheat) at the scale of the agricultural province considering the 15 most productive over 2000-2017 (i.e., 15 x 18 = 270 yields values). {T}o this objective, we built on previous works that showed a tight linkage between cereal yields and various datasets including weather data (rainfall and air temperature), regional climate indices ({N}orth {A}tlantic {O}scillation in particular), and drought indices derived from satellite observations in different wavelengths. {T}he combination of the latter three data sets is assessed to predict cereal yields using linear ({M}ultiple {L}inear {R}egression, {MLR}) and non-linear ({S}upport {V}ector {M}achine, {SVM}; {R}andom {F}orest, {RF}, and e{X}treme {G}radient {B}oost, {XGB}oost) machine learning algorithms. {T}he calibration of the algorithmic parameters of the different approaches are carried out using a 5-fold cross validation technique and a leave-one-out method is implemented for model validation. {T}he statistical metrics of the models are first analyzed as a function of the input datasets that are used, and as a function of the lead times, from 4 months to 2 months before harvest. {T}he results show that combining data from multiple sources outperformed models based on one dataset only. {I}n addition, the satellite drought indices are a major source of information for cereal prediction when the forecasting is carried out close to harvest (2 months before), while weather data and, to a lesser extent, climate indices, are key variables for earlier predictions. {T}he best models can accurately predict yield in {J}anuary (4 months before harvest) with an {R}-2 = 0.88 and {RMSE} around 0.22 t. ha(-1). {T}he {XGB}oost method exhibited the best metrics. {F}inally, training a specific model separately for each group of provinces, instead of one global model, improved the prediction performance by reducing the {RMSE} by 10% to 35% depending on the provinces. {I}n conclusion, the results of this study pointed out that combining remote sensing drought indices with climate and weather variables using a machine learning technique is a promising approach for cereal yield forecasting.}, keywords = {crop yield forecasting ; machine learning ; remote sensing drought ; indices ; climate indices ; weather data ; semiarid region ; {MAROC} ; {ZONE} {SEMIARIDE}}, booktitle = {}, journal = {{R}emote {S}ensing}, volume = {13}, numero = {16}, pages = {3101 [21 ]}, year = {2021}, DOI = {10.3390/rs13163101}, URL = {https://www.documentation.ird.fr/hor/fdi:010082762}, }