@article{PAR00014432, title = {{MG}-{D}igger : an automated pipeline to search for giant virus-related sequences in metagenomes}, author = {{V}erneau, {J}. and {L}evasseur, {A}. and {R}aoult, {D}idier and {L}a {S}cola, {B}. and {C}olson, {P}.}, editor = {}, language = {{ENG}}, abstract = {{T}he number of metagenomic studies conducted each year is growing dramatically. {S}torage and analysis of such big data is difficult and time-consuming. {I}nterestingly, analysis shows that environmental and human metagenomes include a significant amount of non-annotated sequences, representing a 'dark matter.' {W}e established a bioinformatics pipeline that automatically detects metagenome reads matching query sequences from a given set and applied this tool to the detection of sequences matching large and giant {DNA} viral members of the proposed order {M}egavirales or virophages. {A} total of 1,045 environmental and human metagenomes (approximate to {T}erabase) were collected, processed, and stored on our bioinformatics server. {I}n addition, nucleotide and protein sequences from 93 {M}egavirales representatives, including 19 giant viruses of amoeba, and 5 virophages, were collected. {T}he pipeline was generated by scripts written in {P}ython language and entitled {MG}-{D}igger. {M}etagenomes previously found to contain megavirus-like sequences were tested as controls. {MG}-{D}igger was able to annotate 100s of metagenome sequences as best matching those of giant viruses. {T}hese sequences were most often found to be similar to phycodnavirus or mimivirus sequences, but included reads related to recently available pandoraviruses, {P}ithovirus sibericum, and faustoviruses. {C}ompared to other tools, {MG}-{D}igger combined stand-alone use on {L}inux or {W}indows operating systems through a user-friendly interface, implementation of ready-to-use customized metagenome databases and query sequence databases, adjustable parameters for {BLAST} searches, and creation of output files containing selected reads with best match identification. {C}ompared to {M}etavir 2, a reference tool in viral metagenome analysis. {MG}-{D}igger detected 8% more true positive {M}egavirales-related reads in a control metagenome. {T}he present work shows that massive, automated and recurrent analyses of metagenomes are effective in improving knowledge about the presence and prevalence of giant viruses in the environment and the human body.}, keywords = {metagenomes ; giant virus ; {M}egavirales ; bioinformatics ; pipeline ; mimivirus}, booktitle = {}, journal = {{F}rontiers in {M}icrobiology}, volume = {7}, numero = {}, pages = {art. 428}, ISSN = {1664-302{X}}, year = {2016}, DOI = {10.3389/fmicb.2016.00428}, URL = {https://www.documentation.ird.fr/hor/{PAR}00014432}, }