@article{fdi:010081522, title = {{K}-mer-based machine learning method to classify {LTR}-retrotransposons in plant genomes}, author = {{O}rozco-{A}rias, {S}. and {C}andamil-{C}ortes, {M}. {S}. and {J}aimes, {P}. {A}. and {P}ina, {J}. {S}. and {T}abares-{S}oto, {R}. and {G}uyot, {R}omain and {I}saza, {G}.}, editor = {}, language = {{ENG}}, abstract = {{E}very day more plant genomes are available in public databases and additional massive sequencing projects (i.e., that aim to sequence thousands of individuals) are formulated and released. {N}evertheless, there are not enough automatic tools to analyze this large amount of genomic information. {LTR} retrotransposons are the most frequent repetitive sequences in plant genomes; however, their detection and classification are commonly performed using semi-automatic and time-consuming programs. {D}espite the availability of several bioinformatic tools that follow different approaches to detect and classify them, none of these tools can individually obtain accurate results. {H}ere, we used {M}achine {L}earning algorithms based on k-mer counts to classify {LTR} retrotransposons from other genomic sequences and into lineages/families with an {F}1-{S}core of 95%, contributing to develop a free-alignment and automatic method to analyze these sequences.}, keywords = {{T}ransposable elements ; {LTR} retrotransposons ; {P}lant genomes ; {M}achine learning ; {C}lassification ; {F}ree-alignment approach ; k-mer based method}, booktitle = {}, journal = {{P}eer{J}}, volume = {9}, numero = {}, pages = {e11456 [20 p.]}, ISSN = {2167-8359}, year = {2021}, DOI = {10.7717/peerj.11456}, URL = {https://www.documentation.ird.fr/hor/fdi:010081522}, }