@article{fdi:010096322, title = {{A} text-to-tabular approach to generate synthetic patient data using {LLM}s}, author = {{T}{\¨o}rnqvist, {M}. and {Z}ucker, {J}ean-{D}aniel and {F}auvel, {T}. and {L}ambert, {N}. and {B}erthelot, {M}. and {M}ovschin, {A}.}, editor = {}, language = {{ENG}}, abstract = {{A}ccess to large-scale high-quality healthcare databases is key to accelerate medical research and make insightful discoveries about diseases. {H}owever, access to such data is often limited by patient privacy concerns, data sharing restrictions and high costs. {T}o overcome these limitations, synthetic patient data has emerged as an alternative. {H}owever, synthetic data generation ({SDG}) methods typically rely on machine learning ({ML}) models trained on original data, leading to the data scarcity problem. {W}e propose an approach to generate synthetic tabular patient data that does not require access to the original data, but only a description of the desired database. {W}e harness prior medical knowledge and the in-context learning capabilities of large language models ({LLM}s) to perform zero-shot generation of realistic patient data, even in low-resource settings. {W}e quantitatively evaluate our approach against state-of-the-art {SDG} {M}odels, using fidelity, privacy, and utility metrics. {O}ur results show that while {LLM}s may not match the performance of state-of-the-art models trained on the original data, they effectively generate realistic patient data with well-preserved clinical correlations. {A}n ablation study highlights key elements of our prompt that contribute to the generation of high-quality synthetic patient data. {T}his approach, which is easy to use and does not require original data or advanced {ML} skills, is particularly valuable for quickly generating custom-designed patient data, supporting project implementation, and providing educational resources.}, keywords = {{G}en{AI} ; {S}ynthetic data generation ; {LLM}}, booktitle = {}, journal = {2025 {I}eee 13th {I}nternational {C}onference on {H}ealthcare {I}nformatics, {I}chi}, numero = {}, pages = {18--29}, ISSN = {2575-2634}, year = {2025}, DOI = {10.1109/ichi64645.2025.00011}, URL = {https://www.documentation.ird.fr/hor/fdi:010096322}, }