@article{fdi:010080396,
  title = {{S}pa{T}em{HTP} : a data analysis pipeline for efficient processing and utilization of temporal high-throughput phenotyping data},
  author = {{K}ar, {S}. and {G}arin, {V}. and {K}holova, {J}. and {V}adez, {V}incent and {D}urbha, {S}. {S}. and {T}anaka, {R}. and {I}wata, {H}. and {U}rban, {M}. {O}. and {A}dinarayana, {J}.},
  editor = {},
  language = {{ENG}},
  abstract = {{T}he rapid development of phenotyping technologies over the last years gave the opportunity to study plant development over time. {T}he treatment of the massive amount of data collected by high-throughput phenotyping ({HTP}) platforms is however an important challenge for the plant science community. {A}n important issue is to accurately estimate, over time, the genotypic component of plant phenotype. {I}n outdoor and field-based {HTP} platforms, phenotype measurements can be substantially affected by data-generation inaccuracies or failures, leading to erroneous or missing data. {T}o solve that problem, we developed an analytical pipeline composed of three modules: detection of outliers, imputation of missing values, and mixed-model genotype adjusted means computation with spatial adjustment. {T}he pipeline was tested on three different traits (3{D} leaf area, projected leaf area, and plant height), in two crops (chickpea, sorghum), measured during two seasons. {U}sing real-data analyses and simulations, we showed that the sequential application of the three pipeline steps was particularly useful to estimate smooth genotype growth curves from raw data containing a large amount of noise, a situation that is potentially frequent in data generated on outdoor {HTP} platforms. {T}he procedure we propose can handle up to 50% of missing values. {I}t is also robust to data contamination rates between 20 and 30% of the data. {T}he pipeline was further extended to model the genotype time series data. {A} change-point analysis allowed the determination of growth phases and the optimal timing where genotypic differences were the largest. {T}he estimated genotypic values were used to cluster the genotypes during the optimal growth phase. {T}hrough a two-way analysis of variance ({ANOVA}), clusters were found to be consistently defined throughout the growth duration. {T}herefore, we could show, on a wide range of scenarios, that the pipeline facilitated efficient extraction of useful information from outdoor {HTP} platform data. {H}igh-quality plant growth time series data is also provided to support breeding decisions. {T}he {R} code of the pipeline is available at},
  keywords = {high-throughput phenotyping ; {S}p{ATS} ; cross-validation ; simulation ; change ; point analysis ; {HTP}-pipeline},
  booktitle = {},
  journal = {{F}rontiers in {P}lant {S}cience},
  volume = {11},
  numero = {},
  pages = {552509 [16 p.]},
  ISSN = {1664-462{X}},
  year = {2020},
  URL = {https://www.documentation.ird.fr/hor/fdi:010080396},
}