<?xml version="1.0" encoding="utf-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:hal="http://hal.archives-ouvertes.fr/" xmlns:gml="http://www.opengis.net/gml/3.3/" xmlns:gmlce="http://www.opengis.net/gml/3.3/ce" version="1.1" xsi:schemaLocation="http://www.tei-c.org/ns/1.0 http://api.archives-ouvertes.fr/documents/aofr-sword.xsd">
  <teiHeader>
    <fileDesc>
      <titleStmt>
        <title>HAL TEI export of hal-03472117</title>
      </titleStmt>
      <publicationStmt>
        <distributor>CCSD</distributor>
        <availability status="restricted">
          <licence target="https://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 - Universal</licence>
        </availability>
        <date when="2026-05-17T15:55:19+02:00"/>
      </publicationStmt>
      <sourceDesc>
        <p part="N">HAL API Platform</p>
      </sourceDesc>
    </fileDesc>
  </teiHeader>
  <text>
    <body>
      <listBibl>
        <biblFull>
          <titleStmt>
            <title xml:lang="en">Investigating the Impact of Gender Representation in ASR Training Data: a Case Study on Librispeech</title>
            <author role="aut">
              <persName>
                <forename type="first">Mahault</forename>
                <surname>Garnerin</surname>
              </persName>
              <email type="md5">2bda8a80065fb0ce5148a43f5d45868f</email>
              <email type="domain">gmail.com</email>
              <idno type="idhal" notation="numeric">1072412</idno>
              <idno type="halauthorid" notation="string">1395971-1072412</idno>
              <idno type="IDREF">https://www.idref.fr/263663051</idno>
              <idno type="ORCID">https://orcid.org/0009-0006-5614-5526</idno>
              <affiliation ref="#struct-1043313"/>
              <affiliation ref="#struct-1043301"/>
              <affiliation ref="#struct-1043147"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Solange</forename>
                <surname>Rossato</surname>
              </persName>
              <idno type="halauthorid">43971-0</idno>
              <affiliation ref="#struct-1043313"/>
              <affiliation ref="#struct-1043301"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Laurent</forename>
                <surname>Besacier</surname>
              </persName>
              <idno type="halauthorid">7533-0</idno>
              <affiliation ref="#struct-1043313"/>
            </author>
            <editor role="depositor">
              <persName>
                <forename>Mahault</forename>
                <surname>Garnerin</surname>
              </persName>
              <email type="md5">fd6f539b5c096f0b6681d5edd15fa03f</email>
              <email type="domain">univ-grenoble-alpes.fr</email>
            </editor>
          </titleStmt>
          <editionStmt>
            <edition n="v1" type="current">
              <date type="whenSubmitted">2021-12-09 10:42:22</date>
              <date type="whenModified">2025-09-27 19:59:47</date>
              <date type="whenReleased">2021-12-22 11:24:52</date>
              <date type="whenProduced">2021-08</date>
              <date type="whenEndEmbargoed">2021-12-09</date>
              <ref type="file" target="https://hal.univ-grenoble-alpes.fr/hal-03472117v1/document">
                <date notBefore="2021-12-09"/>
              </ref>
              <ref type="file" subtype="author" n="1" target="https://hal.univ-grenoble-alpes.fr/hal-03472117v1/file/garnerin-etal-camera-ready.pdf" id="file-3472117-3045664">
                <date notBefore="2021-12-09"/>
              </ref>
            </edition>
            <respStmt>
              <resp>contributor</resp>
              <name key="774879">
                <persName>
                  <forename>Mahault</forename>
                  <surname>Garnerin</surname>
                </persName>
                <email type="md5">fd6f539b5c096f0b6681d5edd15fa03f</email>
                <email type="domain">univ-grenoble-alpes.fr</email>
              </name>
            </respStmt>
          </editionStmt>
          <publicationStmt>
            <distributor>CCSD</distributor>
            <idno type="halId">hal-03472117</idno>
            <idno type="halUri">https://hal.univ-grenoble-alpes.fr/hal-03472117</idno>
            <idno type="halBibtex">garnerin:hal-03472117</idno>
            <idno type="halRefHtml">&lt;i&gt;3rd Workshop on Gender Bias in Natural Language Processing&lt;/i&gt;, Aug 2021, Online, France. pp.86-92, &lt;a target="_blank" href="https://dx.doi.org/10.18653/v1/2021.gebnlp-1.10"&gt;&amp;#x27E8;10.18653/v1/2021.gebnlp-1.10&amp;#x27E9;&lt;/a&gt;</idno>
            <idno type="halRef">3rd Workshop on Gender Bias in Natural Language Processing, Aug 2021, Online, France. pp.86-92, &amp;#x27E8;10.18653/v1/2021.gebnlp-1.10&amp;#x27E9;</idno>
            <availability status="restricted">
              <licence target="https://about.hal.science/hal-authorisation-v1/">HAL Authorization<ref corresp="#file-3472117-3045664"/></licence>
            </availability>
          </publicationStmt>
          <seriesStmt>
            <idno type="stamp" n="SHS">Sciences de l'Homme et de la Société</idno>
            <idno type="stamp" n="UGA">HAL Grenoble Alpes</idno>
            <idno type="stamp" n="CNRS">CNRS - Centre national de la recherche scientifique</idno>
            <idno type="stamp" n="INPG">Institut polytechnique de Grenoble</idno>
            <idno type="stamp" n="LIG">Laboratoire d'Informatique de Grenoble</idno>
            <idno type="stamp" n="AO-LINGUISTIQUE">Archives ouvertes de la Linguistique</idno>
            <idno type="stamp" n="LIG_TDCGE_GETALP" corresp="LIG_TDCGE">GETALP</idno>
            <idno type="stamp" n="LIDILEM">Laboratoire de Linguistique et Didactique des Langues Etrangères et Maternelles</idno>
            <idno type="stamp" n="TEST-HALCNRS">Collection test HAL CNRS</idno>
            <idno type="stamp" n="UGA-EPE">Université Grenoble Alpes [2020-*]</idno>
            <idno type="stamp" n="LIG_SIDCH" corresp="LIG">Systèmes intelligents pour les données, les connaissances et les humains</idno>
            <idno type="stamp" n="MEMO-SHS">Méditerranée en tensions, Monde en recomposition</idno>
            <idno type="stamp" n="TEST-UGA">TEST-UGA</idno>
          </seriesStmt>
          <notesStmt>
            <note type="audience" n="2">International</note>
            <note type="invited" n="0">No</note>
            <note type="popular" n="0">No</note>
            <note type="peer" n="1">Yes</note>
            <note type="proceedings" n="1">Yes</note>
          </notesStmt>
          <sourceDesc>
            <biblStruct>
              <analytic>
                <title xml:lang="en">Investigating the Impact of Gender Representation in ASR Training Data: a Case Study on Librispeech</title>
                <author role="aut">
                  <persName>
                    <forename type="first">Mahault</forename>
                    <surname>Garnerin</surname>
                  </persName>
                  <email type="md5">2bda8a80065fb0ce5148a43f5d45868f</email>
                  <email type="domain">gmail.com</email>
                  <idno type="idhal" notation="numeric">1072412</idno>
                  <idno type="halauthorid" notation="string">1395971-1072412</idno>
                  <idno type="IDREF">https://www.idref.fr/263663051</idno>
                  <idno type="ORCID">https://orcid.org/0009-0006-5614-5526</idno>
                  <affiliation ref="#struct-1043313"/>
                  <affiliation ref="#struct-1043301"/>
                  <affiliation ref="#struct-1043147"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Solange</forename>
                    <surname>Rossato</surname>
                  </persName>
                  <idno type="halauthorid">43971-0</idno>
                  <affiliation ref="#struct-1043313"/>
                  <affiliation ref="#struct-1043301"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Laurent</forename>
                    <surname>Besacier</surname>
                  </persName>
                  <idno type="halauthorid">7533-0</idno>
                  <affiliation ref="#struct-1043313"/>
                </author>
              </analytic>
              <monogr>
                <title level="m">Proceedings of the 3rd Workshop on Gender Bias in Natural Language Processing</title>
                <meeting>
                  <title>3rd Workshop on Gender Bias in Natural Language Processing</title>
                  <date type="start">2021-08</date>
                  <date type="end">2021-08</date>
                  <settlement>Online</settlement>
                  <country key="FR">France</country>
                </meeting>
                <imprint>
                  <publisher>Association for Computational Linguistics</publisher>
                  <biblScope unit="pp">86-92</biblScope>
                </imprint>
              </monogr>
              <idno type="doi">10.18653/v1/2021.gebnlp-1.10</idno>
            </biblStruct>
          </sourceDesc>
          <profileDesc>
            <langUsage>
              <language ident="en">English</language>
            </langUsage>
            <textClass>
              <classCode scheme="halDomain" n="shs">Humanities and Social Sciences</classCode>
              <classCode scheme="halDomain" n="shs.genre">Humanities and Social Sciences/Gender studies</classCode>
              <classCode scheme="halDomain" n="shs.langue">Humanities and Social Sciences/Linguistics</classCode>
              <classCode scheme="halDomain" n="info">Computer Science [cs]</classCode>
              <classCode scheme="halDomain" n="info.info-ai">Computer Science [cs]/Artificial Intelligence [cs.AI]</classCode>
              <classCode scheme="halTypology" n="COMM">Conference papers</classCode>
              <classCode scheme="halOldTypology" n="COMM">Conference papers</classCode>
              <classCode scheme="halTreeTypology" n="COMM">Conference papers</classCode>
            </textClass>
            <abstract xml:lang="en">
              <p>In this paper we question the impact of gender representation in training data on the performance of an end-to-end ASR system. We create an experiment based on the Librispeech corpus and build 3 different training corpora varying only the proportion of data produced by each gender category. We observe that if our system is overall robust to the gender balance or imbalance in training data, it is nonetheless dependant of the adequacy between the individuals present in the training and testing sets.</p>
            </abstract>
          </profileDesc>
        </biblFull>
      </listBibl>
    </body>
    <back>
      <listOrg type="structures">
        <org type="researchteam" xml:id="struct-1043313" status="VALID">
          <orgName>Groupe d’Étude en Traduction Automatique/Traitement Automatisé des Langues et de la Parole</orgName>
          <orgName type="acronym">GETALP</orgName>
          <date type="start">2020-01-01</date>
          <desc>
            <address>
              <addrLine>Laboratoire LIG - Bâtiment IMAG - 700 avenue Centrale, CS 40700 - 38058 Grenoble cedex 9</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://lig-getalp.imag.fr/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-1043301" type="direct"/>
            <relation active="#struct-300009" type="indirect"/>
            <relation name="UMR5217" active="#struct-441569" type="indirect"/>
            <relation active="#struct-1042703" type="indirect"/>
            <relation active="#struct-1043329" type="indirect"/>
          </listRelation>
        </org>
        <org type="laboratory" xml:id="struct-1043301" status="VALID">
          <idno type="IdRef">148425550</idno>
          <idno type="ISNI">0000 0001 2286 4035</idno>
          <idno type="RNSR">200711886U</idno>
          <idno type="ROR">https://ror.org/01c8rcg82</idno>
          <orgName>Laboratoire d'Informatique de Grenoble</orgName>
          <orgName type="acronym">LIG</orgName>
          <date type="start">2020-01-01</date>
          <desc>
            <address>
              <addrLine>UMR 5217 - Laboratoire LIG - Bâtiment IMAG - 700 avenue Centrale - Domaine Universitaire de Saint-Martin-d’Hères Adresse postale : CS 40700 - 38058 Grenoble cedex 9Tél. : 04 57 42 14 00</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.liglab.fr/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-300009" type="direct"/>
            <relation name="UMR5217" active="#struct-441569" type="direct"/>
            <relation active="#struct-1042703" type="direct"/>
            <relation active="#struct-1043329" type="direct"/>
          </listRelation>
        </org>
        <org type="laboratory" xml:id="struct-1043147" status="VALID">
          <idno type="IdRef">029859301</idno>
          <idno type="ISNI">0000000123536957</idno>
          <idno type="RNSR">199113164C</idno>
          <idno type="ROR">https://ror.org/05588ks88</idno>
          <orgName>LInguistique et DIdactique des Langues Étrangères et Maternelles</orgName>
          <orgName type="acronym">LIDILEM</orgName>
          <date type="start">2020-01-01</date>
          <desc>
            <address>
              <addrLine>Bâtiment Stendhal - CS40700 - 38058 Grenoble cedex 9</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://lidilem.univ-grenoble-alpes.fr/</ref>
          </desc>
          <listRelation>
            <relation name="EA609" active="#struct-1042703" type="direct"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-300009" status="VALID">
          <idno type="ROR">https://ror.org/02kvxyf05</idno>
          <orgName>Institut National de Recherche en Informatique et en Automatique</orgName>
          <orgName type="acronym">Inria</orgName>
          <desc>
            <address>
              <addrLine>Domaine de VoluceauRocquencourt - BP 10578153 Le Chesnay Cedex</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.inria.fr/en/</ref>
          </desc>
        </org>
        <org type="regroupinstitution" xml:id="struct-441569" status="VALID">
          <idno type="IdRef">02636817X</idno>
          <idno type="ISNI">0000000122597504</idno>
          <idno type="ROR">https://ror.org/02feahw73</idno>
          <orgName>Centre National de la Recherche Scientifique</orgName>
          <orgName type="acronym">CNRS</orgName>
          <date type="start">1939-10-19</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.cnrs.fr/</ref>
          </desc>
        </org>
        <org type="regroupinstitution" xml:id="struct-1042703" status="VALID">
          <idno type="IdRef">240648315</idno>
          <idno type="ROR">https://ror.org/02rx3b187</idno>
          <orgName>Université Grenoble Alpes</orgName>
          <orgName type="acronym">UGA</orgName>
          <date type="start">2020-01-01</date>
          <desc>
            <address>
              <addrLine>Adresse CS 40700 - 38058 Grenoble cedex</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.univ-grenoble-alpes.fr</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-1043329" status="VALID">
          <idno type="IdRef">026388804</idno>
          <idno type="ROR">https://ror.org/05sbt2524</idno>
          <orgName>Institut polytechnique de Grenoble - Grenoble Institute of Technology</orgName>
          <orgName type="acronym">Grenoble INP</orgName>
          <date type="start">2020-01-01</date>
          <desc>
            <address>
              <addrLine>46 avenue Félix Viallet 38031 Grenoble Cedex 1</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.grenoble-inp.fr/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-1042703" type="direct"/>
          </listRelation>
        </org>
      </listOrg>
    </back>
  </text>
</TEI>