@article{fdi:010049800, title = {{A} data-mining approach for assessing consistency between multiple representations in spatial databases}, author = {{S}heeren, {D}. and {M}ustiere, {S}. and {Z}ucker, {J}ean-{D}aniel}, editor = {}, language = {{ENG}}, abstract = {{W}hen different spatial databases are combined, an important issue is the identification of inconsistencies between data. {Q}uite often, representations of the same geographical entities in databases are different and reflect different points of view. {I}n order to fully take advantage of these differences when object instances are associated, a key issue is to determine whether the differences are normal, i.e. explained by the database specifications, or if they are due to erroneous or outdated data in one database. {I}n this paper, we propose a knowledge-based approach to partially automate the consistency assessment between multiple representations of data. {T}he inconsistency detection is viewed as a knowledge-acquisition problem, the source of knowledge being the data. {T}he consistency assessment is carried out by applying a proposed method called {MECO}. {T}his method is itself parameterized by some domain knowledge obtained from a second method called {MACO}. {MACO} supports two approaches (direct or indirect) to perform the knowledge acquisition using data-mining techniques. {I}n particular, a supervised learning approach is defined to automate the knowledge acquisition so as to drastically reduce the human-domain expert's work. {T}hanks to this approach, the knowledge-acquisition process is sped up and less expert-dependent. {T}raining examples are obtained automatically upon completion of the spatial data matching. {K}nowledge extraction from data following this bottom-up approach is particularly useful, since the database specifications are generally complex, difficult to analyse, and manually encoded. {S}uch a data-driven process also sheds some light on the gap between textual specifications and those actually used to produce the data. {T}he methodology is illustrated and experimentally validated by comparing geometrical representations and attribute values of different vector spatial databases. {T}he advantages and limits of such partially automatic approaches are discussed, and some future works are suggested.}, keywords = {{D}ata mining ; {I}nconsistency ; {I}ntegration ; {M}etadata ; {M}ultiple representation ; {S}patial data matching}, booktitle = {}, journal = {{I}nternational {J}ournal of {G}eographical {I}nformation {S}cience}, volume = {23}, numero = {8}, pages = {961--992}, ISSN = {1365-8816}, year = {2009}, DOI = {10.1080/13658810701791949}, URL = {https://www.documentation.ird.fr/hor/fdi:010049800}, }