@article{fdi:010095035, title = {{SIM}-{N}et : a multimodal fusion network using inferred 3{D} object shape point clouds from {RGB} images for 2{D} classification}, author = {{S}klab, {Y}oucef and {A}riouat, {H}. and {C}henin, {E}ric and {P}rifti, {E}di and {Z}ucker, {J}ean-{D}aniel}, editor = {}, language = {{ENG}}, abstract = {{W}e introduce the shape-image multimodal network ({SIM}-{N}et), a novel 2{D} image classification architecture that integrates 3{D} point cloud representations inferred directly from {RGB} images. {O}ur key contribution lies in a pixel-to-point transformation that converts 2{D} object masks into 3{D} point clouds, enabling the fusion of texture-based and geometric features for enhanced classification performance. {SIM}-{N}et is particularly well-suited for the classification of digitised herbarium specimens-a task made challenging by heterogeneous backgrounds, nonplant elements, and occlusions that compromise conventional image-based models. {T}o address these issues, {SIM}-{N}et employs a segmentation-based preprocessing step to extract object masks prior to 3{D} point cloud generation. {T}he architecture comprises a {CNN} encoder for 2{D} image features and a {P}oint{N}et-based encoder for geometric features, which are fused into a unified latent space. {E}xperimental evaluations on herbarium datasets demonstrate that {SIM}-{N}et consistently outperforms {R}es{N}et101, achieving gains of up to 9.9% in accuracy and 12.3% in {F}-score. {I}t also surpasses several transformer-based state-of-the-art architectures, highlighting the benefits of incorporating 3{D} structural reasoning into 2{D} image classification tasks.}, keywords = {botany ; computer vision ; image classification ; image processing ; image representation ; learning (artificial intelligence) ; neural net ; architecture ; neural nets ; object recognition}, booktitle = {}, journal = {{IET} {C}omputer {V}ision}, volume = {19}, numero = {1}, pages = {e70036 [18 p.]}, ISSN = {1751-9632}, year = {2025}, DOI = {10.1049/cvi2.70036}, URL = {https://www.documentation.ird.fr/hor/fdi:010095035}, }