@article{fdi:010094312, title = {{G}raph embeddings meet link keys discovery for entity matching}, author = {{J}radeh, {C}. {K}. and {R}aoufi, {E}. and {D}avid, {J}. and {L}armande, {P}ierre and {S}charffe, {F}. and {T}odorov, {K}. and {T}rojahn, {C}. and {A}ssociation for {C}omputing {M}achinery,}, editor = {}, language = {{ENG}}, abstract = {{E}ntity {M}atching ({EM}) automates the discovery of identity links between entities within different {K}nowledge {G}raphs ({KG}s). {L}ink keys are crucial for {EM}, serving as rules allowing to identify identity links across different {KG}s, possibly described using different ontologies. {H}owever, the approach for extracting link keys struggles to scale on large {KG}s. {W}hile embedding-based {EM} methods efficiently handle large {KG}s they lack explainability. {T}his paper proposes a novel hybrid {EM} approach to guarantee the scalability link key extraction approach and improve the explainability of embeddingbased {EM} methods. {F}irst, embedding-based {EM} approaches are used to sample the {KG}s based on the identity links they generate, thereby reducing the search space to relevant sub-graphs for link key extraction. {S}econd, rules (in the form of link keys) are extracted to explain the generation of identity links by the embedding-based methods. {E}xperimental results demonstrate that the proposed approach allows link key extraction to scale on large {KG}s, preserving the quality of the extracted link keys. {A}dditionally, it shows that link keys can improve the explainability of the identity links generated by embedding-methods, allowing for the regeneration of 77% of the identity links produced for a specific {EM} task, thereby providing an approximation of the reasons behind their generation.}, keywords = {{E}ntity matching ; {K}nowledge graphs ; {L}ink keys ; {E}mbedding-based {EM} ; {S}ymbolic {EM} ; {G}raph embeddings ; {L}anguage models ; {H}ybrid {AI}}, booktitle = {}, journal = {{P}roceedings of the {ACM} {W}eb {C}onference 2025, {WWW} 2025}, numero = {}, pages = {3344--3353}, year = {2025}, DOI = {10.1145/3696410.3714581}, URL = {https://www.documentation.ird.fr/hor/fdi:010094312}, }