@article{fdi:010090913, title = {{F}rangi{PAN}e, a tool for creating a panreference using left behind reads}, author = {{D}ubreuil {T}ranchant, {C}hristine and {C}henal, {C}lothilde and {B}laison, {M}. and {A}lbar, {L}aurence and {K}lein, {V}. and {M}ariac, {C}{\'e}dric and {W}ing, {R}.{A}. and {V}igouroux, {Y}ves and {S}abot, {F}ran{\c{c}}ois}, editor = {}, language = {{ENG}}, abstract = {{W}e present here {F}rangi{PAN}e, a pipeline developed to build panreference using short reads through a map-then-assemble strategy. {A}pplying it to 248 {A}frican rice genomes using an improved {CG}14 reference genome, we identified an average of 8 {M}b of new sequences and 5290 new contigs per individual. {I}n total, 1.4 {G} of new sequences, consisting of 1 306 676 contigs, were assembled. {W}e validated 97.7% of the contigs of the {TOG}5681 cultivar individual assembly from short reads on a newly long reads genome assembly of the same {TOG}5681 cultivar. {F}rangi{PAN}e also allowed the anchoring of 31.5% of the new contigs within the {CG}14 reference genome, with a 92.5% accuracy at 2 kb span. {W}e annotated in addition 3252 new genes absent from the reference. {F}rangi{PAN}e was developed as a modular and interactive application to simplify the construction of a panreference using the map-then-assemble approach. {I}t is available as a {D}ocker image containing (i) a {J}upyter notebook centralizing codes, documentation and interactive visualization of results, (ii) python scripts and (iii) all the software and libraries requested for each step of the analysis. {W}e foreseen our approach will help leverage large-scale illumina dataset for pangenome studies in {GWAS} or detection of selection.}, keywords = {}, booktitle = {}, journal = {{NAR} {G}enomics and {B}ioinformatics}, volume = {5}, numero = {1}, pages = {lqad013 [8 ]}, ISSN = {2631-9268}, year = {2023}, DOI = {10.1093/nargab/lqad013}, URL = {https://www.documentation.ird.fr/hor/fdi:010090913}, }