Reads WGS data, including PCGR tiers.tsv
, PURPLE cnv.gene.tsv
, and
sv.prioritised.tsv
. If the file path has been specified in the RNAsum params and is
valid, it is returned. As a fallback, if the umccrise directory param has
been specified, then there is an attempt to detect the file pattern in there.
Examples
p <- list(
umccrise = system.file("rawdata/test_data/umccrised/test_sample_WGS", package = "RNAsum"),
pcgr_tiers_tsv = system.file(
"rawdata/test_data/umccrised/test_sample_WGS/small_variants",
"TEST-somatic.pcgr.snvs_indels.tiers.tsv",
package = "RNAsum"
),
sash_tsv = system.file(
"rawdata/test_data/test_sample_WGS/structural/TEST.sv.prioritised.tsv",
package = "RNAsum"
)
)
(res <- read_wgs_data(p))
#> $pcgr_tiers_tsv
#> # A tibble: 99 × 61
#> CHROM POS REF ALT GENOMIC_CHANGE GENOME_VERSION VCF_SAMPLE_ID
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 3 179218303 G A 3:g.179218303G>A grch38 SBJ02999__PRJ22…
#> 2 17 7674894 G A 17:g.7674894G>A grch38 SBJ02999__PRJ22…
#> 3 13 110715614 C G 13:g.110715614C>G grch38 SBJ02999__PRJ22…
#> 4 13 110719444 G C 13:g.110719444G>C grch38 SBJ02999__PRJ22…
#> 5 8 20250093 C T 8:g.20250093C>T grch38 SBJ02999__PRJ22…
#> 6 8 13086457 C T 8:g.13086457C>T grch38 SBJ02999__PRJ22…
#> 7 10 31521795 G C 10:g.31521795G>C grch38 SBJ02999__PRJ22…
#> 8 9 132927250 G A 9:g.132927250G>A grch38 SBJ02999__PRJ22…
#> 9 3 47121452 G A 3:g.47121452G>A grch38 SBJ02999__PRJ22…
#> 10 6 117365075 G C 6:g.117365075G>C grch38 SBJ02999__PRJ22…
#> # ℹ 89 more rows
#> # ℹ 54 more variables: VARIANT_CLASS <chr>, SYMBOL <chr>, GENE_NAME <chr>,
#> # CCDS <chr>, CANONICAL <chr>, ENTREZ_ID <chr>, UNIPROT_ID <chr>,
#> # ENSEMBL_TRANSCRIPT_ID <chr>, ENSEMBL_GENE_ID <chr>, REFSEQ_MRNA <chr>,
#> # ONCOSCORE <chr>, ONCOGENE <chr>, TUMOR_SUPPRESSOR <chr>,
#> # ONCOGENE_EVIDENCE <chr>, TUMOR_SUPPRESSOR_EVIDENCE <chr>,
#> # DISGENET_CUI <chr>, DISGENET_TERMS <chr>, CONSEQUENCE <chr>, …
#>
#> $purple_gene_tsv
#> # A tibble: 25,417 × 20
#> chromosome start end gene minCopyNumber maxCopyNumber unused
#> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr>
#> 1 chr1 11869 14409 DDX11L1 1.98 1.98 0
#> 2 chr1 14404 29570 WASH7P 1.98 1.98 0
#> 3 chr1 17369 17436 MIR6859-1 1.98 1.98 0
#> 4 chr1 29554 31097 MIR1302-2HG 1.98 1.98 0
#> 5 chr1 30366 30503 MIR1302-2 1.98 1.98 0
#> 6 chr1 34554 36081 FAM138A 1.98 1.98 0
#> 7 chr1 69091 70008 OR4F5 1.98 1.98 0
#> 8 chr1 185217 195411 FO538757.1 1.98 1.98 0
#> 9 chr1 187891 187958 MIR6859-2 1.98 1.98 0
#> 10 chr1 450740 451678 OR4F29 1.98 1.98 0
#> # ℹ 25,407 more rows
#> # ℹ 13 more variables: somaticRegions <dbl>, germlineHomDeletionRegions <dbl>,
#> # germlineHetToHomDeletionRegions <dbl>, transcriptId <chr>,
#> # transcriptVersion <chr>, chromosomeBand <chr>, minRegions <dbl>,
#> # minRegionStart <dbl>, minRegionEnd <dbl>, minRegionStartSupport <chr>,
#> # minRegionEndSupport <chr>, minRegionMethod <chr>,
#> # minMinorAlleleCopyNumber <dbl>
#>
#> $sv_tsv
#> NULL
#>