<?xml version="1.0" encoding="UTF-8"?>
<xml>
  <records>
    <record>
      <source-app name="Horizon">Horizon</source-app>
      <rec-number>1</rec-number>
      <foreign-keys>
        <key app="Horizon" db-id="fdi:010091277">1</key>
      </foreign-keys>
      <ref-type name="Journal Article">17</ref-type>
      <work-type>ACL : Articles dans des revues avec comité de lecture répertoriées par l'AERES</work-type>
      <contributors>
        <authors>
          <author>
            <style face="normal" font="default" size="100%">Nainia, A.</style>
          </author>
          <author>
            <style face="normal" font="default" size="100%">Vignes-Lebbe, R.</style>
          </author>
          <author>
            <style face="bold" font="default" size="100%">Chenin, Eric</style>
          </author>
          <author>
            <style face="normal" font="default" size="100%">Sahraoui, M.</style>
          </author>
          <author>
            <style face="normal" font="default" size="100%">Mousannif, H.</style>
          </author>
          <author>
            <style face="normal" font="default" size="100%">Zahir, J.</style>
          </author>
        </authors>
      </contributors>
      <titles>
        <title>FloraNER : a new dataset for species and morphological terms named entity recognition in French botanical text</title>
        <secondary-title>Data in Brief</secondary-title>
      </titles>
      <pages>110824 [10 p.]</pages>
      <keywords>
        <keyword>NER Dataset</keyword>
        <keyword>Biodiversity dataset</keyword>
        <keyword>Species identification dataset</keyword>
        <keyword>Plant morphology dataset</keyword>
        <keyword>NOUVELLE CALEDONIE</keyword>
      </keywords>
      <dates>
        <year>2024</year>
      </dates>
      <call-num>fdi:010091277</call-num>
      <language>ENG</language>
      <periodical>
        <full-title>Data in Brief</full-title>
      </periodical>
      <isbn>2352-3409</isbn>
      <accession-num>ISI:001299243700001</accession-num>
      <electronic-resource-num>10.1016/j.dib.2024.110824</electronic-resource-num>
      <urls>
        <related-urls>
          <url>https://www.documentation.ird.fr/hor/fdi:010091277</url>
        </related-urls>
        <pdf-urls>
          <url>https://horizon.documentation.ird.fr/exl-doc/pleins_textes/2024-10/010091277.pdf</url>
        </pdf-urls>
      </urls>
      <volume>56</volume>
      <remote-database-provider>Horizon (IRD)</remote-database-provider>
      <abstract>FloraNER is a distantly supervised named entity recognition dataset (NER). The dataset is built from botanical French literature extracted from the OCR-preprocessed flora of New Caledonia, provided by the National Museum of Natural History in France (MNHN), and distantly annotated with a botanical French corpus created by merging botanical lexicons available online. FloraNER comprises separate subdatasets for the recognition of plant species names, as well as coarse-grained and fine-grained botanical morphological terms. The resulting datasets are in CSV format, displaying textual data, identified named entities, and their annotations, covering one named entity type "Species" (Esp &amp; egrave;ce in French) for species name identification, two named entity types "Organ" and "Descriptor" for coarse-grained morphological term identification, and eight named entity types for fine-grained morphological term identification: Organ, Descriptor, Form, Color, Development, Structure, Surface, Position, Disposition, and Measure. This dataset can be utilized to train and evaluate named entity recognition models for extracting information from botanical French literature.</abstract>
      <custom6>076 ; 124</custom6>
      <custom1>UR209</custom1>
      <custom7>Maroc</custom7>
    </record>
  </records>
</xml>
