Skip to contents

Reads WGS data, including PCGR tiers.tsv, PURPLE cnv.gene.tsv, and sv.prioritised.tsv. If the file path has been specified in the RNAsum params and is valid, it is returned. As a fallback, if the umccrise directory param has been specified, then there is an attempt to detect the file pattern in there.

Usage

read_wgs_data(p)

Arguments

p

RNAsum params list.

Value

A list of the input sample data.

Examples

p <- list(
  umccrise = system.file("rawdata/test_data/umccrised/test_sample_WGS", package = "RNAsum"),
  pcgr_tiers_tsv = system.file(
    "rawdata/test_data/umccrised/test_sample_WGS/small_variants",
    "TEST-somatic.pcgr.snvs_indels.tiers.tsv",
    package = "RNAsum"
  ),
  sash_tsv = system.file(
    "rawdata/test_data/test_sample_WGS/structural/TEST.sv.prioritised.tsv",
    package = "RNAsum"
  )
)
(res <- read_wgs_data(p))
#> $pcgr_tiers_tsv
#> # A tibble: 99 × 61
#>    CHROM POS       REF   ALT   GENOMIC_CHANGE    GENOME_VERSION VCF_SAMPLE_ID   
#>    <chr> <chr>     <chr> <chr> <chr>             <chr>          <chr>           
#>  1 3     179218303 G     A     3:g.179218303G>A  grch38         SBJ02999__PRJ22…
#>  2 17    7674894   G     A     17:g.7674894G>A   grch38         SBJ02999__PRJ22…
#>  3 13    110715614 C     G     13:g.110715614C>G grch38         SBJ02999__PRJ22…
#>  4 13    110719444 G     C     13:g.110719444G>C grch38         SBJ02999__PRJ22…
#>  5 8     20250093  C     T     8:g.20250093C>T   grch38         SBJ02999__PRJ22…
#>  6 8     13086457  C     T     8:g.13086457C>T   grch38         SBJ02999__PRJ22…
#>  7 10    31521795  G     C     10:g.31521795G>C  grch38         SBJ02999__PRJ22…
#>  8 9     132927250 G     A     9:g.132927250G>A  grch38         SBJ02999__PRJ22…
#>  9 3     47121452  G     A     3:g.47121452G>A   grch38         SBJ02999__PRJ22…
#> 10 6     117365075 G     C     6:g.117365075G>C  grch38         SBJ02999__PRJ22…
#> # ℹ 89 more rows
#> # ℹ 54 more variables: VARIANT_CLASS <chr>, SYMBOL <chr>, GENE_NAME <chr>,
#> #   CCDS <chr>, CANONICAL <chr>, ENTREZ_ID <chr>, UNIPROT_ID <chr>,
#> #   ENSEMBL_TRANSCRIPT_ID <chr>, ENSEMBL_GENE_ID <chr>, REFSEQ_MRNA <chr>,
#> #   ONCOSCORE <chr>, ONCOGENE <chr>, TUMOR_SUPPRESSOR <chr>,
#> #   ONCOGENE_EVIDENCE <chr>, TUMOR_SUPPRESSOR_EVIDENCE <chr>,
#> #   DISGENET_CUI <chr>, DISGENET_TERMS <chr>, CONSEQUENCE <chr>, …
#> 
#> $purple_gene_tsv
#> # A tibble: 25,417 × 20
#>    chromosome  start    end gene        minCopyNumber maxCopyNumber unused
#>    <chr>       <dbl>  <dbl> <chr>               <dbl>         <dbl> <chr> 
#>  1 chr1        11869  14409 DDX11L1              1.98          1.98 0     
#>  2 chr1        14404  29570 WASH7P               1.98          1.98 0     
#>  3 chr1        17369  17436 MIR6859-1            1.98          1.98 0     
#>  4 chr1        29554  31097 MIR1302-2HG          1.98          1.98 0     
#>  5 chr1        30366  30503 MIR1302-2            1.98          1.98 0     
#>  6 chr1        34554  36081 FAM138A              1.98          1.98 0     
#>  7 chr1        69091  70008 OR4F5                1.98          1.98 0     
#>  8 chr1       185217 195411 FO538757.1           1.98          1.98 0     
#>  9 chr1       187891 187958 MIR6859-2            1.98          1.98 0     
#> 10 chr1       450740 451678 OR4F29               1.98          1.98 0     
#> # ℹ 25,407 more rows
#> # ℹ 13 more variables: somaticRegions <dbl>, germlineHomDeletionRegions <dbl>,
#> #   germlineHetToHomDeletionRegions <dbl>, transcriptId <chr>,
#> #   transcriptVersion <chr>, chromosomeBand <chr>, minRegions <dbl>,
#> #   minRegionStart <dbl>, minRegionEnd <dbl>, minRegionStartSupport <chr>,
#> #   minRegionEndSupport <chr>, minRegionMethod <chr>,
#> #   minMinorAlleleCopyNumber <dbl>
#> 
#> $sv_tsv
#> NULL
#>