<?xml version="1.0" encoding="utf-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:hal="http://hal.archives-ouvertes.fr/" xmlns:gml="http://www.opengis.net/gml/3.3/" xmlns:gmlce="http://www.opengis.net/gml/3.3/ce" version="1.1" xsi:schemaLocation="http://www.tei-c.org/ns/1.0 http://api.archives-ouvertes.fr/documents/aofr-sword.xsd">
  <teiHeader>
    <fileDesc>
      <titleStmt>
        <title>HAL TEI export of hal-01858710</title>
      </titleStmt>
      <publicationStmt>
        <distributor>CCSD</distributor>
        <availability status="restricted">
          <licence target="https://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 - Universal</licence>
        </availability>
        <date when="2026-05-19T17:41:04+02:00"/>
      </publicationStmt>
      <sourceDesc>
        <p part="N">HAL API Platform</p>
      </sourceDesc>
    </fileDesc>
  </teiHeader>
  <text>
    <body>
      <listBibl>
        <biblFull>
          <titleStmt>
            <title xml:lang="en">Efficient analysis of large-scale genome-wide data with two R packages: bigstatsr and bigsnpr</title>
            <author role="aut">
              <persName>
                <forename type="first">Florian</forename>
                <surname>Privé</surname>
              </persName>
              <idno type="halauthorid">1411595-0</idno>
              <affiliation ref="#struct-1042063"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Hugues</forename>
                <surname>Aschard</surname>
              </persName>
              <email type="md5">5b9d2ebd1c65caaaa96e5f1037570f7b</email>
              <email type="domain">inserm.fr</email>
              <idno type="idhal" notation="numeric">849150</idno>
              <idno type="halauthorid" notation="string">313712-849150</idno>
              <affiliation ref="#struct-463018"/>
              <affiliation ref="#struct-530648"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Andrey</forename>
                <surname>Ziyatdinov</surname>
              </persName>
              <idno type="halauthorid">1411596-0</idno>
              <affiliation ref="#struct-530648"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Michael</forename>
                <surname>Blum</surname>
              </persName>
              <email type="md5">bbbef0ead346176a1b1fadd952a3bd0f</email>
              <email type="domain">imag.fr</email>
              <idno type="idhal" notation="numeric">854172</idno>
              <idno type="halauthorid" notation="string">356489-854172</idno>
              <affiliation ref="#struct-1042063"/>
            </author>
            <editor role="depositor">
              <persName>
                <forename>Michael Gb</forename>
                <surname>Blum</surname>
              </persName>
              <email type="md5">bbbef0ead346176a1b1fadd952a3bd0f</email>
              <email type="domain">imag.fr</email>
            </editor>
            <funder ref="#projanr-38053"/>
            <funder ref="#projanr-42467"/>
            <funder>Authors acknowledge LabEx PERSYVAL-Lab (ANR-11-LABX-0025-01). Authors also acknowledge the Grenoble Alpes Data Institute that is supported by the French National Research Agency under the ‘Investissements d’avenir’ program (ANR-15-IDEX-02).Acknowl</funder>
          </titleStmt>
          <editionStmt>
            <edition n="v1" type="current">
              <date type="whenSubmitted">2018-08-21 11:56:19</date>
              <date type="whenModified">2025-09-27 19:59:09</date>
              <date type="whenReleased">2018-08-21 11:56:19</date>
              <date type="whenProduced">2018-08-15</date>
              <ref type="externalLink" target="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6084588"/>
            </edition>
            <respStmt>
              <resp>contributor</resp>
              <name key="139843">
                <persName>
                  <forename>Michael Gb</forename>
                  <surname>Blum</surname>
                </persName>
                <email type="md5">bbbef0ead346176a1b1fadd952a3bd0f</email>
                <email type="domain">imag.fr</email>
              </name>
            </respStmt>
          </editionStmt>
          <publicationStmt>
            <distributor>CCSD</distributor>
            <idno type="halId">hal-01858710</idno>
            <idno type="halUri">https://hal.univ-grenoble-alpes.fr/hal-01858710</idno>
            <idno type="halBibtex">prive:hal-01858710</idno>
            <idno type="halRefHtml">&lt;i&gt;Bioinformatics&lt;/i&gt;, 2018, 34 (16), pp.2781 - 2787. &lt;a target="_blank" href="https://dx.doi.org/10.1093/bioinformatics/bty185"&gt;&amp;#x27E8;10.1093/bioinformatics/bty185&amp;#x27E9;&lt;/a&gt;</idno>
            <idno type="halRef">Bioinformatics, 2018, 34 (16), pp.2781 - 2787. &amp;#x27E8;10.1093/bioinformatics/bty185&amp;#x27E9;</idno>
            <availability status="restricted"/>
          </publicationStmt>
          <seriesStmt>
            <idno type="stamp" n="PASTEUR">Institut Pasteur</idno>
            <idno type="stamp" n="UGA">HAL Grenoble Alpes</idno>
            <idno type="stamp" n="IMAG">IMAG</idno>
            <idno type="stamp" n="CNRS">CNRS - Centre national de la recherche scientifique</idno>
            <idno type="stamp" n="INPG">Institut polytechnique de Grenoble</idno>
            <idno type="stamp" n="RIIP_PARIS">Institut Pasteur de Paris</idno>
            <idno type="stamp" n="TIMC-IMAG">TIMC</idno>
            <idno type="stamp" n="TIMC-IMAG-BCM" corresp="TIMC-IMAG">BCM : Biologie Computationnelle et Mathématique</idno>
            <idno type="stamp" n="PERSYVAL-LAB">[Labex] PERSYVAL-lab</idno>
            <idno type="stamp" n="UNIV-LYON">Université de Lyon</idno>
            <idno type="stamp" n="UGA-COMUE">Université Grenoble Alpes [2016-2019]</idno>
            <idno type="stamp" n="ANR">ANR</idno>
            <idno type="stamp" n="TEST-UGA">TEST-UGA</idno>
          </seriesStmt>
          <notesStmt>
            <note type="audience" n="2">International</note>
            <note type="popular" n="0">No</note>
            <note type="peer" n="1">Yes</note>
          </notesStmt>
          <sourceDesc>
            <biblStruct>
              <analytic>
                <title xml:lang="en">Efficient analysis of large-scale genome-wide data with two R packages: bigstatsr and bigsnpr</title>
                <author role="aut">
                  <persName>
                    <forename type="first">Florian</forename>
                    <surname>Privé</surname>
                  </persName>
                  <idno type="halauthorid">1411595-0</idno>
                  <affiliation ref="#struct-1042063"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Hugues</forename>
                    <surname>Aschard</surname>
                  </persName>
                  <email type="md5">5b9d2ebd1c65caaaa96e5f1037570f7b</email>
                  <email type="domain">inserm.fr</email>
                  <idno type="idhal" notation="numeric">849150</idno>
                  <idno type="halauthorid" notation="string">313712-849150</idno>
                  <affiliation ref="#struct-463018"/>
                  <affiliation ref="#struct-530648"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Andrey</forename>
                    <surname>Ziyatdinov</surname>
                  </persName>
                  <idno type="halauthorid">1411596-0</idno>
                  <affiliation ref="#struct-530648"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Michael</forename>
                    <surname>Blum</surname>
                  </persName>
                  <email type="md5">bbbef0ead346176a1b1fadd952a3bd0f</email>
                  <email type="domain">imag.fr</email>
                  <idno type="idhal" notation="numeric">854172</idno>
                  <idno type="halauthorid" notation="string">356489-854172</idno>
                  <affiliation ref="#struct-1042063"/>
                </author>
              </analytic>
              <monogr>
                <idno type="halJournalId" status="VALID">3436</idno>
                <idno type="issn">1367-4803</idno>
                <idno type="eissn">1367-4811</idno>
                <title level="j">Bioinformatics</title>
                <imprint>
                  <publisher>Oxford University Press (OUP)</publisher>
                  <biblScope unit="volume">34</biblScope>
                  <biblScope unit="issue">16</biblScope>
                  <biblScope unit="pp">2781 - 2787</biblScope>
                  <date type="datePub">2018-08-15</date>
                </imprint>
              </monogr>
              <idno type="doi">10.1093/bioinformatics/bty185</idno>
              <idno type="pubmed">29617937</idno>
              <idno type="pubmedcentral">PMC6084588</idno>
            </biblStruct>
          </sourceDesc>
          <profileDesc>
            <langUsage>
              <language ident="en">English</language>
            </langUsage>
            <textClass>
              <classCode scheme="halDomain" n="sdv.gen.gpo">Life Sciences [q-bio]/Genetics/Populations and Evolution [q-bio.PE]</classCode>
              <classCode scheme="halTypology" n="ART">Journal articles</classCode>
              <classCode scheme="halOldTypology" n="ART">Journal articles</classCode>
              <classCode scheme="halTreeTypology" n="ART">Journal articles</classCode>
            </textClass>
            <abstract xml:lang="en">
              <p>MotivationGenome-wide datasets produced for association studies have dramatically increased in size over the past few years, with modern datasets commonly including millions of variants measured in dozens of thousands of individuals. This increase in data size is a major challenge severely slowing down genomic analyses, leading to some software becoming obsolete and researchers having limited access to diverse analysis tools.ResultsHere we present two R packages, bigstatsr and bigsnpr, allowing for the analysis of large scale genomic data to be performed within R. To address large data size, the packages use memory-mapping for accessing data matrices stored on disk instead of in RAM. To perform data pre-processing and data analysis, the packages integrate most of the tools that are commonly used, either through transparent system calls to existing software, or through updated or improved implementation of existing methods. In particular, the packages implement fast and accurate computations of principal component analysis and association studies, functions to remove single nucleotide polymorphisms in linkage disequilibrium and algorithms to learn polygenic risk scores on millions of single nucleotide polymorphisms. We illustrate applications of the two R packages by analyzing a case–control genomic dataset for celiac disease, performing an association study and computing polygenic risk scores. Finally, we demonstrate the scalability of the R packages by analyzing a simulated genome-wide dataset including 500 000 individuals and 1 million markers on a single desktop computer.Availability and implementationhttps://privefl.github.io/bigstatsr/ and https://privefl.github.io/bigsnpr/.</p>
            </abstract>
          </profileDesc>
        </biblFull>
      </listBibl>
    </body>
    <back>
      <listOrg type="structures">
        <org type="researchteam" xml:id="struct-1042063" status="OLD">
          <orgName>Biologie Computationnelle et Mathématique</orgName>
          <orgName type="acronym">TIMC-IMAG-BCM</orgName>
          <date type="start">2016-01-01</date>
          <date type="end">2019-12-31</date>
          <desc>
            <address>
              <addrLine>Domaine de la Merci, 38706 La Tronche, France</addrLine>
              <country key="FR"/>
            </address>
          </desc>
          <listRelation>
            <relation active="#struct-1042061" type="direct"/>
            <relation active="#struct-89889" type="indirect"/>
            <relation active="#struct-301767" type="indirect"/>
            <relation name="UMR5525" active="#struct-441569" type="indirect"/>
            <relation active="#struct-445543" type="indirect"/>
          </listRelation>
        </org>
        <org type="laboratory" xml:id="struct-463018" status="OLD">
          <orgName>Centre de Bioinformatique, Biostatistique et Biologie Intégrative</orgName>
          <orgName type="acronym">C3BI</orgName>
          <date type="start">2015-03-01</date>
          <date type="end">2019-04-30</date>
          <desc>
            <address>
              <addrLine>25-28 rue du docteur Roux, 75724 Paris cedex 15</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://c3bi.pasteur.fr/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-300027" type="direct"/>
            <relation name="USR3756" active="#struct-441569" type="direct"/>
          </listRelation>
        </org>
        <org type="regrouplaboratory" xml:id="struct-530648" status="VALID">
          <orgName>School of Public Health [Boston]</orgName>
          <desc>
            <address>
              <addrLine>Talbot Building, C202715 Albany StreetBoston, MA 02118</addrLine>
              <country key="US"/>
            </address>
            <ref type="url">http://www.bu.edu/sph/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-63395" type="direct"/>
          </listRelation>
        </org>
        <org type="laboratory" xml:id="struct-1042061" status="OLD">
          <idno type="IdRef">14728936X</idno>
          <idno type="ISNI">0000 0004 4687 1979</idno>
          <idno type="RNSR">199511969L</idno>
          <idno type="ROR">https://ror.org/03985kf35</idno>
          <orgName>Techniques de l'Ingénierie Médicale et de la Complexité - Informatique, Mathématiques et Applications, Grenoble - UMR 5525</orgName>
          <orgName type="acronym">TIMC-IMAG</orgName>
          <date type="start">2016-01-01</date>
          <date type="end">2019-12-31</date>
          <desc>
            <address>
              <addrLine>Domaine de la Merci, 38706 La Tronche, France</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www-timc.imag.fr</ref>
          </desc>
          <listRelation>
            <relation active="#struct-89889" type="direct"/>
            <relation active="#struct-301767" type="direct"/>
            <relation name="UMR5525" active="#struct-441569" type="direct"/>
            <relation active="#struct-445543" type="direct"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-89889" status="OLD">
          <idno type="IdRef">026388804</idno>
          <idno type="ROR">https://ror.org/05sbt2524</idno>
          <orgName>Institut polytechnique de Grenoble - Grenoble Institute of Technology</orgName>
          <orgName type="acronym">Grenoble INP</orgName>
          <date type="start">2007-01-01</date>
          <date type="end">2019-12-31</date>
          <desc>
            <address>
              <addrLine>46 avenue Félix Viallet 38031 Grenoble Cedex 1</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.grenoble-inp.fr/</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-301767" status="VALID">
          <idno type="ROR">https://ror.org/01c7wz417</idno>
          <orgName>VetAgro Sup - Institut national d'enseignement supérieur et de recherche en alimentation, santé animale, sciences agronomiques et de l'environnement</orgName>
          <orgName type="acronym">VAS</orgName>
          <date type="start">2010-01-01</date>
          <desc>
            <address>
              <addrLine>Université de Lyon, VetAgro Sup, 69280 Marcy l'Etoile (campus vétérinaire); Université de Clermont, VetAgro Sup, 63370 Lempdes (campus agronomique)</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.vetagro-sup.fr/</ref>
          </desc>
        </org>
        <org type="regroupinstitution" xml:id="struct-441569" status="VALID">
          <idno type="IdRef">02636817X</idno>
          <idno type="ISNI">0000000122597504</idno>
          <idno type="ROR">https://ror.org/02feahw73</idno>
          <orgName>Centre National de la Recherche Scientifique</orgName>
          <orgName type="acronym">CNRS</orgName>
          <date type="start">1939-10-19</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.cnrs.fr/</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-445543" status="OLD">
          <idno type="IdRef">188399275</idno>
          <idno type="ROR">https://ror.org/02rx3b187</idno>
          <orgName>Université Grenoble Alpes [2016-2019]</orgName>
          <orgName type="acronym">UGA [2016-2019]</orgName>
          <date type="start">2016-01-01</date>
          <date type="end">2019-12-31</date>
          <desc>
            <address>
              <addrLine>38058 Grenoble cedex</addrLine>
              <country key="FR"/>
            </address>
          </desc>
        </org>
        <org type="institution" xml:id="struct-300027" status="VALID">
          <idno type="IdRef">027936643</idno>
          <idno type="ISNI">0000 0001 2353 6535</idno>
          <idno type="ROR">https://ror.org/0495fxg12</idno>
          <orgName>Institut Pasteur [Paris]</orgName>
          <orgName type="acronym">IP</orgName>
          <date type="start">1887-06-04</date>
          <desc>
            <address>
              <addrLine>25-28, rue du docteur Roux, 75724 Paris cedex 15</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.pasteur.fr</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-63395" status="VALID">
          <idno type="IdRef">026472104</idno>
          <idno type="ROR">https://ror.org/05qwgg493</idno>
          <orgName>Boston University [Boston]</orgName>
          <orgName type="acronym">BU</orgName>
          <desc>
            <address>
              <addrLine>One Silber Way, Boston, MA 02215</addrLine>
              <country key="US"/>
            </address>
            <ref type="url">http://www.bu.edu/</ref>
          </desc>
        </org>
      </listOrg>
      <listOrg type="projects">
        <org type="anrProject" xml:id="projanr-38053" status="VALID">
          <idno type="anr">ANR-11-LABX-0025</idno>
          <idno type="program">Laboratoires d'excellence</idno>
          <orgName>PERSYVAL-lab</orgName>
          <desc>Systemes et Algorithmes Pervasifs au confluent des mondes physique et numérique</desc>
          <date type="start">2011</date>
        </org>
        <org type="anrProject" xml:id="projanr-42467" status="VALID">
          <idno type="anr">ANR-15-IDEX-0002</idno>
          <orgName>UGA</orgName>
          <desc>IDEX UGA</desc>
          <date type="start">2015</date>
        </org>
      </listOrg>
    </back>
  </text>
</TEI>