@article{fdi:010086383, title = {{F}iltering out false {S}argassum detections using context features}, author = {{P}odlejski, {W}. and {D}escloitres, {J}. and {C}hevalier, {C}rist{\`e}le and {M}inghelli, {A}. and {L}ett, {C}hristophe and {B}erline, {L}.}, editor = {}, language = {{ENG}}, abstract = {{S}ince 2011, the distribution extent of pelagic {S}argassum algae has substantially increased and now covers the whole {T}ropical {N}orth {A}tlantic {O}cean, with significant inter-annual variability. {T}he ocean colour imagery has been used as the only way to monitor regularly such a vast area. {H}owever, the detection is hampered by cloud masking, sunglint, coastal contamination and other phenomena. {A}ll together, they lead to false detections that can hardly be discriminated by classic radiometric analysis, but may be overcome by considering the shape and the context of the detections. {H}ere, we built a machine learning model base exclusively on spatial features to filter out false detections after the detection process. {M}oderate-{R}esolution {I}maging {S}pectroradiometer ({MODIS}, 1 km) data from {A}qua and {T}erra satellites were used to generate daily map of {A}lternative {F}loating {A}lgae {I}ndex ({AFAI}). {B}ased on this radiometric index, {S}argassum presence in the {T}ropical {A}tlantic {N}orth {O}cean was inferred. {F}or every {S}argassum aggregations, five contextual indices were extracted (number of neighbours, surface of neighbours, temporal persistence, distance to the coast and aggregation texture) then used by a random forest binary classifier. {C}ontextual features at large-scale were most important in the classifier. {T}rained with a multi-annual (2016-2020) learning set, the model performs the filtering of daily false detections with an accuracy of similar to 90%. {T}his leads to a reduction of detected {S}argassum pixels of similar to 50% over the domain. {T}he method provides reliable data while preserving high spatial and temporal resolutions (1 km, daily). {T}he resulting distribution is consistent with the literature for seasonal and inter-annual fluctuations, with maximum coverage in 2018 and minimum in 2016. {T}his dataset will be useful for understanding the drivers of {S}argassum dynamics at fine and large scale and validate future models. {T}he methodology used here demonstrates the usefulness of contextual features for complementing classical remote sensing approaches. {O}ur model could easily be adapted to other datasets containing erroneous detections.}, keywords = {{S}argassum algae ; remote sensing ; random forest ; contextual analysis ; {T}ropical {N}orth {A}tlantic ; fractional coverage ; time series ; {ATLANTIQUE} {NORD}}, booktitle = {}, journal = {{F}rontiers in {M}arine {S}cience}, volume = {9}, numero = {}, pages = {960939 [15 p.]}, year = {2022}, DOI = {10.3389/fmars.2022.960939}, URL = {https://www.documentation.ird.fr/hor/fdi:010086383}, }