@article{PAR00022083, title = {{A}utomatic water detection from multidimensional hierarchical clustering for {S}entinel-2 images and a comparison with {L}evel 2{A} processors}, author = {{C}ordeiro, {M}. {C}. {R}. and {M}artinez, {J}ean-{M}ichel and {P}ena-{L}uque, {S}.}, editor = {}, language = {{ENG}}, abstract = {{C}ontinuous monitoring of water surfaces is essential for water resource management. {T}his study presents a nonparametric unsupervised automatic algorithm for the identification of inland water pixels from multispectral satellite data using multidimensional clustering and a high-performance subsampling approach for large scenes. {C}lustering analysis is a technique that is used to identify similar samples in a multidimensional data space. {T}he spectral information and derived indices were used to characterize each scene pixel individually. {A} machine learning approach with random subsampling and generalization through a {N}a ve {B}ayes classifier was also proposed to make the application of complex algorithms to large scenes feasible. {A}ccuracy was evaluated using an independent dataset that provides water bodies in 15 {S}entinel-2 images over {F}rance acquired in different seasons and that covers a large range of water bodies and water colour types. {T}he validation dataset covers a water surface of more than 1200 km(2) (approximately 12 million pixels) including over 80,000 water bodies outlined using a semiautomatic active learning method, which were manually revised. {T}he classification results were compared to the water pixel classification using three of the major {L}evel 2{A} processors ({MAJA}, {S}en2{C}or and {FM}ask) and two of the most common thresholding techniques: {O}tsu and {C}anny-edge. {A}n input mask was used to remove coastal waters, clouds, shadows and snow pixels. {W}ater pixels were identified automatically from the clustering process without the need for ancillary or pretrained data. {C}ombinations using up to three water indices ({M}odified {N}ormalized {D}ifference {W}ater {I}ndex-{MNDWI}, {N}ormalized {D}ifference {W}ater {I}ndex-{NDWI} and {M}ultiband {W}ater {I}ndex-{MBWI}) and two reflectance bands ({B}8 and {B}12) were tested in the algorithm, and the best combination was {NDWI}-{B}12. {O}f all the methods, our method achieved the highest mean kappa score, 0.874, across all tested scenes, with a per-scene kappa ranging from 0.608 to 0.980, and the lowest mean standard deviation of 0.091. {S}tandard {O}tsu's thresholding had the worst performance due to the lack of a bimodal histogram, and the {C}anny-edge variation achieved an overall kappa of 0.718 when used with the {MNDWI}. {F}or water masks provided by generic processors, {FM}ask outperformed {MAJA} and {S}en2{C}or and obtained an overall kappa of 0.764. {I}n-depth analysis shows a quick drop in performance for all of the methods in identifying water bodies with a surface area below 0.5 ha, but the proposed approach outperformed the second best method by 34% in this size class.}, keywords = {{W}ater detection ; {W}ater mask ; {S}entinel-2 ; {U}nsupervised clustering ; {M}achine learning ; naive bayes classifier ; {FRANCE}}, booktitle = {}, journal = {{R}emote {S}ensing of {E}nvironment}, volume = {253}, numero = {}, pages = {112209 [17 p.]}, ISSN = {0034-4257}, year = {2021}, DOI = {10.1016/j.rse.2020.112209}, URL = {https://www.documentation.ird.fr/hor/{PAR}00022083}, }