@article{fdi:010086976, title = {{H}igh-resolution species assignment of {A}nopheles mosquitoes using k-mer distances on targeted sequences}, author = {{B}odd{\'e}, {M}. and {M}akunin, {A}. and {A}yala, {D}iego and {B}ouafou, {L}. and {D}iabate, {A}. and {E}kpo, {U}. {F}. and {K}ientega, {M}. and {L}e {G}off, {G}ilbert and {M}akanga, {B}. {K}. and {N}gangue, {M}. {F}. and {O}mitola, {O}. {O}. and {R}ahola, {N}il and {T}ripet, {F}. and {D}urbin, {R}. and {L}awniczak, {M}. {K}. {N}.}, editor = {}, language = {{ENG}}, abstract = {{T}he {ANOSPP} amplicon panel is a genus-wide targeted sequencing panel to facilitate large-scale monitoring of {A}nopheles species diversity. {C}ombining information from the 62 nuclear amplicons present in the {ANOSPP} panel allows for a more senstive and specific species assignment than single gene (e.g. {COI}) barcoding, which is desirable in the light of permeable species boundaries. {H}ere, we present {NN}o{VAE}, a method using {N}earest {N}eighbours ({NN}) and {V}ariational {A}utoencoders ({VAE}), which we apply to k-mers resulting from the {ANOSPP} amplicon sequences in order to hierarchically assign species identity. {T}he {NN} step assigns a sample to a species-group by comparing the k-mers arising from each haplotype's amplicon sequence to a reference database. {T}he {VAE} step is required to distinguish between closely related species, and also has sufficient resolution to reveal population structure within species. {I}n tests on independent samples with over 80% amplicon coverage, {NN}o{VAE} correctly classifies to species level 98% of samples within the {A}n. gambiae complex and 89% of samples outside the complex. {W}e apply {NN}o{VAE} to over two thousand new samples from {B}urkina {F}aso and {G}abon, identifying unexpected species in {G}abon. {NN}o{VAE} presents an approach that may be of value to other targeted sequencing panels, and is a method that will be used to survey {A}nopheles species diversity and {P}lasmodium transmission patterns through space and time on a large scale, with plans to analyse half a million mosquitoes in the next five years.}, keywords = {species assignment ; anopheles ; k-mers ; nearest neighbour ; {VAE} ; {O}ther}, booktitle = {}, journal = {{E}life}, volume = {11}, numero = {}, pages = {e78775 [40 p.]}, ISSN = {2050-084{X}}, year = {2022}, DOI = {10.7554/e{L}ife.78775}, URL = {https://www.documentation.ird.fr/hor/fdi:010086976}, }