@incollection{fdi:010090983, title = {{M}2-{M}ixer : a multimodal mixer with multi-head loss for classification from multimodal data}, author = {{B}ezirganyan, {G}. and {S}ellami, {S}. and {B}erti-{E}quille, {L}aure and {F}ournier, {S}.}, editor = {}, language = {{ENG}}, abstract = {{I}n this paper, we propose {M}2-{M}ixer, an {MLP}-{M}ixer based architecture with multi-head loss for multimodal classification. {I}t achieves better performances than the convolutional, recurrent, or neural architecture search based baseline models with the main advantage of conceptual and computational simplicity. {T}he proposed multi-head loss function addresses the problem of modality predominance (i.e., when one of the modalities is favored over the others by the training algorithm). {O}ur experiments demonstrate that our multimodal mixer architecture, combined with the multi-head loss function, outperforms the baseline models on two benchmark multimodal datasets: {AVMNIST} and {MIMIC}-{III} with respectively, on average, + 0.43% in accuracy and 6. 4 times reduction in training time and + 0.33% in accuracy and 13. 3 times reduction in training time, compared with previous best performing models.}, keywords = {}, booktitle = {2023 {IEEE} {I}nternational {C}onference on {B}ig {D}ata}, numero = {}, pages = {1052--1058}, address = {{P}iscataway}, publisher = {{IEEE}}, series = {}, year = {2023}, DOI = {10.1109/{B}ig{D}ata59044.2023.10386252}, ISBN = {979-8-3503-2446-4}, URL = {https://www.documentation.ird.fr/hor/fdi:010090983}, }