@article{fdi:010074154, title = {{F}inding the right fit : comparative cetacean distribution models using multiple data sources and statistical approaches}, author = {{D}erville, {S}. and {T}orres, {L}. {G}. and {I}ovan, {C}orina and {G}arrigue, {C}laire}, editor = {}, language = {{ENG}}, abstract = {{A}im: {A}ccurate predictions of cetacean distributions are essential to their conservation but are limited by statistical challenges and a paucity of data. {T}his study aimed at comparing the capacity of various statistical algorithms to deal with biases commonly found in nonsystematic cetacean surveys and to evaluate the potential for citizen science data to improve habitat modelling and predictions. {A}n endangered population of humpback whales ({M}egaptera novaeangliae) in their breeding ground was used as a case study. {L}ocation: {N}ew {C}aledonia, {O}ceania. {M}ethods: {F}ive statistical algorithms were used to model the habitat preferences of humpback whales from 1,360 sightings collected over 14 years of nonsystematic research surveys. {T}hree different background sampling approaches were tested when developing models from 625 crowdsourced sightings to assess methods accounting for citizen science spatial sampling bias. {M}odel evaluation was conducted through cross-validation and prediction to an independent satellite tracking dataset. {R}esults: {A}lgorithms differed in complexity of the environmental relationships modelled, ecological interpretability and transferability. {W}hile parameter tuning had a great effect on model performances, {GLM}s generally had low predictive performance, {SVM}s were particularly hard to interpret, and {BRT}s had high descriptive power but showed signs of overfitting. {MAXENT} and especially {GAM}s provided a valuable complexity trade-off, accurate predictions and were ecologically intelligible. {M}odels showed that humpback whales favoured cool (22-23 degrees {C}) and shallow waters (0-100 m deep) in coastal as well as offshore areas. {C}itizen science models converged with research survey models, specifically when accounting for spatial sampling bias. {M}ain conclusions: {M}arine megafauna distribution models present specific challenges that may be addressed through integrative evaluation, independent testing and appropriately tuned statistical algorithms. {S}pecifically, controlling overfitting is a priority when predicting cetacean distributions for large-scale conservation perspectives. {C}itizen science data appear to be a powerful tool to describe cetacean habitat.}, keywords = {citizen science ; generalized regression ; humpback whales ; machine learning ; species distribution models ; support vector machines ; {NOUVELLE} {CALEDONIE} ; {PACIFIQUE}}, booktitle = {}, journal = {{D}iversity and {D}istributions}, volume = {24}, numero = {11}, pages = {1657--1673}, ISSN = {1366-9516}, year = {2018}, DOI = {10.1111/ddi.12782}, URL = {https://www.documentation.ird.fr/hor/fdi:010074154}, }