<?xml version="1.0"?>
<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd">
  <dc:title>FloraNER : a new dataset for species and morphological terms named entity recognition in French botanical text</dc:title>
  <dc:creator>Nainia, A.</dc:creator>
  <dc:creator>Vignes-Lebbe, R.</dc:creator>
  <dc:creator>/Chenin, Eric</dc:creator>
  <dc:creator>Sahraoui, M.</dc:creator>
  <dc:creator>Mousannif, H.</dc:creator>
  <dc:creator>Zahir, J.</dc:creator>
  <dc:subject>NER Dataset</dc:subject>
  <dc:subject>Biodiversity dataset</dc:subject>
  <dc:subject>Species identification dataset</dc:subject>
  <dc:subject>Plant morphology dataset</dc:subject>
  <dc:description>FloraNER is a distantly supervised named entity recognition dataset (NER). The dataset is built from botanical French literature extracted from the OCR-preprocessed flora of New Caledonia, provided by the National Museum of Natural History in France (MNHN), and distantly annotated with a botanical French corpus created by merging botanical lexicons available online. FloraNER comprises separate subdatasets for the recognition of plant species names, as well as coarse-grained and fine-grained botanical morphological terms. The resulting datasets are in CSV format, displaying textual data, identified named entities, and their annotations, covering one named entity type "Species" (Esp &amp; egrave;ce in French) for species name identification, two named entity types "Organ" and "Descriptor" for coarse-grained morphological term identification, and eight named entity types for fine-grained morphological term identification: Organ, Descriptor, Form, Color, Development, Structure, Surface, Position, Disposition, and Measure. This dataset can be utilized to train and evaluate named entity recognition models for extracting information from botanical French literature.</dc:description>
  <dc:date>2024</dc:date>
  <dc:type>text</dc:type>
  <dc:identifier>https://www.documentation.ird.fr/hor/fdi:010091277</dc:identifier>
  <dc:identifier>fdi:010091277</dc:identifier>
  <dc:identifier>Nainia A., Vignes-Lebbe R., Chenin Eric, Sahraoui M., Mousannif H., Zahir J.. FloraNER : a new dataset for species and morphological terms named entity recognition in French botanical text. 2024, 56,  110824 [10 p.]</dc:identifier>
  <dc:language>EN</dc:language>
  <dc:coverage>NOUVELLE CALEDONIE</dc:coverage>
</oai_dc:dc>
