Make a Gene2Symbol object

makeGene2SymbolFromEnsembl(organism, genomeBuild = NULL,
  release = NULL, ignoreTxVersion = TRUE, format = c("makeUnique",
  "unmodified", "1:1"))

makeGene2SymbolFromEnsDb(object, format = c("makeUnique", "unmodified",
  "1:1"))

makeGene2SymbolFromGFF(file, format = c("makeUnique", "unmodified",
  "1:1"))

Arguments

organism

character(1). Full Latin organism name (e.g. "Homo sapiens").

genomeBuild

character(1). Ensembl genome build assembly name (e.g. "GRCh38"). If set NULL, defaults to the most recent build available. Note: don't pass in UCSC build IDs (e.g. "hg38").

release

integer(1). Ensembl release version (e.g. 90). We recommend setting this value if possible, for improved reproducibility. When left unset, the latest release available via AnnotationHub/ensembldb is used. Note that the latest version available can vary, depending on the versions of AnnotationHub and ensembldb in use.

ignoreTxVersion

logical(1). Don't the include the transcript version in the identifier. Only applies when level = "transcripts". This simplifies identifier matching when generating a tx2gene file.

format

character(1). Formatting method to apply:

  • "makeUnique": Recommended. Apply make.unique() to the geneName column. Gene symbols are made unique, while the gene IDs remain unmodified.

  • "unmodified": Return geneID and geneName columns unmodified, in long format.

  • "1:1": For gene symbols that map to multiple gene IDs, select only the first annotated gene ID.

object

Object.

file

character(1). File path.

Value

Gene2Symbol.

Note

Updated 2019-07-28.

GFF/GTF file

Remote URLs and compressed files are supported.

Examples

## makeGene2SymbolFromEnsembl ==== x <- makeGene2SymbolFromEnsembl(organism = "Homo sapiens")
#> Making GRanges from Ensembl.
#> Matching EnsDb from AnnotationHub 2.18.0 (2019-10-29).
#> AH75011: Ensembl 98 EnsDb for Homo sapiens #> Run this code to download EnsDb manually: #> > library(AnnotationHub) #> > ah <- AnnotationHub() #> > edb <- ah[["AH75011"]]
#> Making GRanges from EnsDb object.
#> - Organism: "Homo sapiens" #> - Genome build: "GRCh38" #> - Ensembl release: 98L #> - Level: "genes"
#> Defining 'broadClass' using: geneBiotype, geneName, seqnames.
#> Arranging by 'geneID'.
#> 67946 genes detected.
#> 3132 non-unique gene symbols detected.
#> Gene2Symbol with 67946 rows and 2 columns #> geneID geneName #> <character> <character> #> ENSG00000000003 ENSG00000000003 TSPAN6 #> ENSG00000000005 ENSG00000000005 TNMD #> ENSG00000000419 ENSG00000000419 DPM1 #> ENSG00000000457 ENSG00000000457 SCYL3 #> ENSG00000000460 ENSG00000000460 C1orf112 #> ... ... ... #> LRG_995 LRG_995 FUBP1.1 #> LRG_996 LRG_996 ERBB3.1 #> LRG_997 LRG_997 ROS1.1 #> LRG_998 LRG_998 CCND3.1 #> LRG_999 LRG_999 CIC.1
## makeTx2GeneFromEnsDb ==== x <- makeGene2SymbolFromEnsDb("EnsDb.Hsapiens.v75")
#> Making GRanges from EnsDb object.
#> Loading required namespace: EnsDb.Hsapiens.v75
#> - Organism: "Homo sapiens" #> - Genome build: "GRCh37" #> - Ensembl release: 75L #> - Level: "genes"
#> Defining 'broadClass' using: geneBiotype, geneName, seqnames.
#> Arranging by 'geneID'.
#> 64102 genes detected.
#> 3075 non-unique gene symbols detected.
#> Gene2Symbol with 64102 rows and 2 columns #> geneID geneName #> <character> <character> #> ENSG00000000003 ENSG00000000003 TSPAN6 #> ENSG00000000005 ENSG00000000005 TNMD #> ENSG00000000419 ENSG00000000419 DPM1 #> ENSG00000000457 ENSG00000000457 SCYL3 #> ENSG00000000460 ENSG00000000460 C1orf112 #> ... ... ... #> LRG_94 LRG_94 LRG_94 #> LRG_96 LRG_96 LRG_96 #> LRG_97 LRG_97 LRG_97 #> LRG_98 LRG_98 LRG_98 #> LRG_99 LRG_99 LRG_99
## makeGene2SymbolFromGFF ==== ## GTF file <- file.path(basejumpTestsURL, "example.gtf") x <- makeGene2SymbolFromGFF(file)
#> Making GRanges from GFF file.
#> Importing 'example.gtf' using 'rtracklayer::import()'.
#> Ensembl GTF detected.
#> Defining 'broadClass' using: geneBiotype, geneName, seqnames.
#> Arranging by 'geneID'.
#> 17 genes detected.
#> Gene2Symbol with 17 rows and 2 columns #> geneID geneName #> <character> <character> #> ENSMUSG00000025900 ENSMUSG00000025900 Rp1 #> ENSMUSG00000051951 ENSMUSG00000051951 Xkr4 #> ENSMUSG00000064842 ENSMUSG00000064842 Gm26206 #> ENSMUSG00000088333 ENSMUSG00000088333 Gm27396 #> ENSMUSG00000089699 ENSMUSG00000089699 Gm1992 #> ... ... ... #> ENSMUSG00000103147 ENSMUSG00000103147 Gm7341 #> ENSMUSG00000103161 ENSMUSG00000103161 Gm38148 #> ENSMUSG00000103201 ENSMUSG00000103201 Gm37329 #> ENSMUSG00000103377 ENSMUSG00000103377 Gm37180 #> ENSMUSG00000104017 ENSMUSG00000104017 Gm37363
## GFF3 file <- file.path(basejumpTestsURL, "example.gff3") x <- makeGene2SymbolFromGFF(file)
#> Making GRanges from GFF file.
#> Importing 'example.gff3' using 'rtracklayer::import()'.
#> Ensembl GFF3 detected.
#> Defining 'broadClass' using: geneBiotype, geneName, seqnames.
#> Arranging by 'geneID'.
#> 20 genes detected.
#> Gene2Symbol with 20 rows and 2 columns #> geneID geneName #> <character> <character> #> ENSMUSG00000025900 ENSMUSG00000025900 Rp1 #> ENSMUSG00000025902 ENSMUSG00000025902 Sox17 #> ENSMUSG00000051951 ENSMUSG00000051951 Xkr4 #> ENSMUSG00000064842 ENSMUSG00000064842 Gm26206 #> ENSMUSG00000088333 ENSMUSG00000088333 Gm27396 #> ... ... ... #> ENSMUSG00000103161 ENSMUSG00000103161 Gm38148 #> ENSMUSG00000103201 ENSMUSG00000103201 Gm37329 #> ENSMUSG00000103377 ENSMUSG00000103377 Gm37180 #> ENSMUSG00000104017 ENSMUSG00000104017 Gm37363 #> ENSMUSG00000104123 ENSMUSG00000104123 Gm37483