Tissue-partitioned BMI instrument procedure

Author

Nick Sunderland

Published

January 10, 2025

Motivation

What does the weighting by PPH4 do to tissue partitioned BMI instruments? Let’s simulate some tissue partitioned BMI MR results.

library(data.table)
library(ggplot2)
library(ggpubr)
library(ggrepel)
library(genepi.utils)
library(tibble)
library(knitr)

BMI data

First load the BMI GWAS.

bmi    <- readRDS(file.path(TPMR_DIR, "output", "tmp_objects", "obj_bmi.RDS"))
clumps <- clump(as.data.table(bmi), p1=5e-8, r2=0.001, kb=10000, plink2=PLINK2, plink_ref=UKBB_REFERENCE)[index==TRUE]
bmi    <- subset_gwas(bmi, clumps$rsid)
kable(head(clumps, 5), caption = "BMI lead variants")

BMI lead variants
rsid	chr	bp	ea	oa	eaf	beta	se	n	ncase	trait	id	strand	imputed	info	q	q_p	i2	proxy_rsid	proxy_chr	proxy_bp	proxy_ea	proxy_oa	proxy_eaf	proxy_r2	snp	index	clump
rs10007906	4	31023610	A	C	0.360300	0.0133000	0.0018000	686479	NA	bmi	combined_bmi_giant_ukb_annot.txt	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	rs10007906	TRUE	291
rs1000940	17	5283252	A	G	0.701300	-0.0154000	0.0018000	794558	NA	bmi	combined_bmi_giant_ukb_annot.txt	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	rs1000940	TRUE	142
rs1007934	14	73463479	A	G	0.415000	-0.0120000	0.0017000	692411	NA	bmi	combined_bmi_giant_ukb_annot.txt	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	rs1007934	TRUE	407
rs10125137	9	118644190	T	C	0.641786	-0.0117273	0.0020523	463005	NA	bmi	combined_bmi_giant_ukb_annot.txt	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	rs10125137	TRUE	811
rs10131890	14	97258752	A	C	0.950300	-0.0222000	0.0039000	691777	NA	bmi	combined_bmi_giant_ukb_annot.txt	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	rs10131890	TRUE	846

Heart failure data

Load the heart failure GWAS.

hf <- readRDS(file.path(TPMR_DIR, "output", "tmp_objects", "obj_hf_allcause.RDS"))
hf <- subset_gwas(hf, clumps$rsid)

Harmonise the datasets

h <- MR(bmi, hf)
kable(head(as.data.table(h),5), caption = "Harmonised BMI:HF dataset")

Harmonised BMI:HF dataset
rsid	chr	bp	ea	oa	bx	bxse	by	byse	py	proxy_e_snp	proxy_o_snp	index_snp	group	ld_info	exposure	outcome
rs10007906_C_A	4	31023610	A	C	0.0133000	0.0018000	0.0039	0.0051	0.44980	NA	NA	TRUE	NA	FALSE	bmi	hf_allcause
rs1000940_G_A	17	5283252	A	G	-0.0154000	0.0018000	0.0011	0.0050	0.82970	NA	NA	TRUE	NA	FALSE	bmi	hf_allcause
rs1007934_G_A	14	73463479	A	G	-0.0120000	0.0017000	-0.0092	0.0049	0.05931	NA	NA	TRUE	NA	FALSE	bmi	hf_allcause
rs10125137_C_T	9	118644190	T	C	-0.0117273	0.0020523	0.0033	0.0049	0.50820	NA	NA	TRUE	NA	FALSE	bmi	hf_allcause
rs10131890_C_A	14	97258752	A	C	-0.0222000	0.0039000	-0.0211	0.0099	0.03310	NA	NA	TRUE	NA	FALSE	bmi	hf_allcause

Run the MR

This is just the plain MR of BMI on HF.

res <- run_mr(h)
kable(res, caption = "BMI:HF Mendelian randomisation")

BMI:HF Mendelian randomisation
method	correlation	exposure	outcome	n_snp	b	b_se	intercept	int_se	int_p	qstat	qstat_p	fstat	condfstat	overdispersion	n_pc	n_hunted	slopehunter_pi	slopehunter_ent
mr_ivw	FALSE	bmi	hf_allcause	874	0.4872820	0.0165599	0.0000000	NA	NA	1592.460	0	60.70091	NA	NA	NA	NA	NA	NA
mr_egger	FALSE	bmi	hf_allcause	874	0.5367727	0.0464790	-0.0008232	0.0007224	0.2544728	1590.092	0	NA	NA	NA	NA	NA	NA	NA
mr_weighted_median	FALSE	bmi	hf_allcause	874	0.5078217	0.0236696	0.0000000	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA
mr_weighted_mode	FALSE	bmi	hf_allcause	874	0.5456300	0.0523114	0.0000000	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA

Plot MR

Plot MR result of BMI on HF.

plot_mr(h, res)

Select variants - MR with a subset of BMI variants

In the Leyden et al. 2022 paper 86 variant colocalise with adipose tissue eQTLs and 140 colocalise with brain tissue eQTLs. There is overlap between the two tissues of 43 variants. Let’s run the run of BMI on HF just using these variants - just looks like a similar version of the previous plot but with fewer points.

set.seed(123)
tissue1_varid <- sample(h@snps, 140)
overlap_varid <- sample(tissue1_varid, 43)
tissue2_varid <- c(overlap_varid, sample(h@snps[!h@snps %in% overlap_varid], 86-43))

# turn of variants not in either tissue
h@index_snp[!h@snps %in% c(tissue1_varid, tissue2_varid)] <- FALSE
h@exposure <- "BMI tissue 1/2 SNPs"

res1 <- run_mr(h)
kable(res1, caption = "BMI:HF Mendelian randomisation (Tissue 1 & 2 SNPs only)")

BMI:HF Mendelian randomisation (Tissue 1 & 2 SNPs only)
method	correlation	exposure	outcome	n_snp	b	b_se	p	intercept	int_se	int_p	qstat	qstat_p	fstat	condfstat	overdispersion	n_pc	n_hunted	slopehunter_pi	slopehunter_ent
mr_ivw	FALSE	BMI tissue 1/2 SNPs	hf_allcause	177	0.5108471	0.0363407	0e+00	0.0000000	NA	NA	304.2402	0	61.28134	NA	NA	NA	NA	NA	NA
mr_egger	FALSE	BMI tissue 1/2 SNPs	hf_allcause	177	0.5475313	0.1037260	1e-07	-0.0006034	0.0015974	0.7056336	303.9924	0	NA	NA	NA	NA	NA	NA	NA
mr_weighted_median	FALSE	BMI tissue 1/2 SNPs	hf_allcause	177	0.5730810	0.0494983	0e+00	0.0000000	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA
mr_weighted_mode	FALSE	BMI tissue 1/2 SNPs	hf_allcause	177	0.5887868	0.1022882	0e+00	0.0000000	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA	NA

plot_mr(h, res1)

Run multivariable MR (unweighted)

Now lets turn this into a multivariable MR situation, with BMI variants for tissues one and BMI variants for tissue 2, against HF. The results are the same, as expected, because there is no differential weighting of the betas, effectively they are just the same instrument. So far nothing new.

# copy of BMI for tissue 1
bmi_t1 <- bmi
bmi_t1@id <- "bmi tissue 1"
bmi_t1@trait <- "bmi tissue 1"

# copy of BMI for tissue 2
bmi_t2 <- bmi
bmi_t2@id <- "bmi tissue 2"
bmi_t2@trait <- "bmi tissue 2"

# harmonise
h_mvmr <- MR(list(bmi_t1, bmi_t2), hf)

# turn of variants not in either tissue
h_mvmr@index_snp[!h_mvmr@snps %in% c(tissue1_varid, tissue2_varid)] <- FALSE

res2 <- run_mr(h_mvmr, methods="mr_ivw")
kable(res2, caption = "BMI:HF MV Mendelian randomisation")

BMI:HF MV Mendelian randomisation
method	correlation	exposure	outcome	n_snp	b	b_se	p	intercept	int_se	int_p	qstat	qstat_p	fstat	condfstat	overdispersion	n_pc	n_hunted	slopehunter_pi	slopehunter_ent
mr_mvivw	FALSE	bmi tissue 1	hf_allcause	177	0.5108471	0.0363407	0	0	NA	NA	302.5116	0	NA	0	NA	NA	NA	NA	NA
mr_mvivw	FALSE	bmi tissue 2	hf_allcause	177	0.5108471	0.0363407	0	0	NA	NA	302.5116	0	NA	0	NA	NA	NA	NA	NA

plot_mr(h_mvmr, res2)

eQTL colocalising BMI variants

To separate the BMI instruments for different tissues we need to ‘change’ them somehow, in a biologically informed way. Hence weighting the betas by the best colocalisation results at each locus.

Number of colocalising variants per tissue

Let’s first look at how many BMI variants colocalise with eQTLs from the GTEx version 10 tissues. There is quite a spread.

coloc_thresh <- 0.8

coloc_results <- fread(file.path(TPMR_DIR, "output", "tables", "coloc_results.tsv.gz"))
coloc_counts <- (coloc_results
  [locus_trait=="bmi" & grepl("^bmi", trait.x) & !grepl("^bmi|^hf_", trait.y) & grepl("protein_coding", gene_type.y)]
  [, num_coloc_genes := sum(pph4>coloc_thresh), by=trait.y]
  [, .SD[which.max(pph4)], by=.(locus_index_rsid, trait.y)]
  [, .(num_coloc_genes= num_coloc_genes[1],
       num_coloc_pph4 = sum(pph4>coloc_thresh),
       mean_pph4      = mean(pph4),
       mean_sample_n  = mean(sample_n.y)), by=c(tissue="trait.y")]
  [, tissue := factor(tissue, levels = tissue[order(-`num_coloc_pph4`)])]
)

ggplot(coloc_counts, aes(y = tissue, x = `num_coloc_pph4`)) +
  theme_classic() +
  geom_col(fill= "blue", alpha = 0.3) +
  labs(subtitle = paste("coloc thresh:", coloc_thresh), 
       x = "Number of colocalising BMI variants") +
  theme(axis.title.y = element_blank())

Determinants of colocalisation number

Does the number of colocalising variants, and/or the mean PPH4 value (across all BMI variants) for each tissue, depend on eQTL sample size? Yes, this appears to be important.

ggplot(coloc_counts, aes(x = mean_sample_n, y = num_coloc_pph4, color = mean_pph4, size = num_coloc_genes)) +
  geom_point() +
  geom_text_repel(aes(label = tissue), size = 2, color="black", max.overlaps = Inf) +
  scale_color_viridis_c(option="mako") +
  theme_classic() +
  labs(subtitle = paste("coloc thresh:", coloc_thresh),
       color = "Mean PPH4",
       size = "No. coloc genes",
       y = "No. colocalising BMI variants",
       x = "Sample size")

Effect of number of colocalising loci on MVMR result - 1

Simple simulation - hows does the number of variants used as an instrument effect the MVMR result. Let’s hold constant the number of colocalising variants in tissue 1 (set to median: 76) and then vary the number of variants in tissue 2. For now, let’s hold the PPH4 weighting for colocalising variants at constant 0.9 and the for the non-colocalising tissue variants something low, like 0.2. We see that the MVMR estimate flips when the second tissue has more variants than the first. The tissue with higher numbers of variants has the large MVMR estimate.

num_loci <- expand.grid(num_t1  = round(median(coloc_counts$num_coloc_pph4)), 
                        num_t2  = seq(min(coloc_counts$num_coloc_pph4), max(coloc_counts$num_coloc_pph4)),
                        pph4_hi = 0.9, 
                        pph4_lo = 0.2) |> as.data.table()

unique_num_loci <- sort(unique(c(num_loci$num_t1, num_loci$num_t2)))
unique_num_loci <- setNames(unique_num_loci, as.character(unique_num_loci))

loci_indicies <- lapply(unique_num_loci, function(num_l) {
  set.seed(123)
  sample(1:length(h_mvmr@snps), num_l, replace = FALSE)
})

mr_num_loci <- num_loci[, {
  h_tmp   <- copy(h_mvmr)
  t1_incides <- loci_indicies[[as.character(num_t1)]]
  t2_incides <- loci_indicies[[as.character(num_t2)]]
  h_tmp@index_snp <- ifelse(1:length(h_tmp@snps) %in% union(t1_incides, t2_incides), TRUE, FALSE) 
  h_tmp@bx[t1_incides,1] <- h_tmp@bx[t1_incides,1] * pph4_hi
  h_tmp@bx[t2_incides,2] <- h_tmp@bx[t2_incides,2] * pph4_hi
  h_tmp@bx[setdiff(t2_incides, t1_incides),1] <- h_tmp@bx[setdiff(t2_incides, t1_incides),1] * pph4_lo
  h_tmp@bx[setdiff(t1_incides, t2_incides),2] <- h_tmp@bx[setdiff(t1_incides, t2_incides),2] * pph4_lo
  h_tmp@exposure <- c("Tissue 1", "Tissue 2")
  r <- run_mr(h_tmp, methods="mr_ivw")
}, by = .(num_t1, num_t2)]

ggplot(mr_num_loci[, `:=`(b_lb = b-b_se*1.96, 
                          b_ub = b+b_se*1.96)],
       aes(x=num_t2, y=b, ymin=b_lb, ymax=b_ub, color=exposure)) +
  geom_errorbar(width=0.2, position = position_dodge(0.2)) +
  geom_point(position = position_dodge(0.2)) +
  theme_bw() +
  labs(y = "MVMR estimate (tpBMI vs HF)", 
       x = "Number of tissue 2 loci", 
       color = "Tissue", 
       subtitle = paste0("Number of loci for tissue 1 fixed at ", num_loci$num_t1[1]))

Effect of number of colocalising loci on MVMR result - 2

Now do the same but vary both the number of colocalising variants in tissue 1 and 2, and the percentage overlap of the instruments (grid search). We see that there has to be some difference in the number of colocalising loci and there needs to be some overlap of the loci in order to separate the MVMR results for each tissue.

num_loci <- expand.grid(num_t1  = seq(min(coloc_counts$num_coloc_pph4), max(coloc_counts$num_coloc_pph4), 50), 
                        num_t2  = seq(min(coloc_counts$num_coloc_pph4), max(coloc_counts$num_coloc_pph4), 50),
                        overlap = seq(0, 1, 0.2),
                        seed    = 2000:2100,
                        pph4_hi = 0.9, 
                        pph4_lo = 0.1) |> as.data.table()

library(future)
library(furrr)
plan(multisession)
mr_num_loci <- future_pmap_dfr(num_loci, function(num_t1, num_t2, overlap, seed, pph4_hi, pph4_lo) {
  
  h_tmp   <- copy(h_mvmr)
  
  # randomly sort the BMI SNP indices
  set.seed(seed)
  sram_idx <- sample.int(length(h_mvmr@snps))
  
  # indicies of the variants to use
  overlap_n <- round(min(num_t1, num_t2) * overlap)
  t1i       <- sram_idx[1:num_t1]
  t2i       <- sram_idx[(num_t1-overlap_n+1):(num_t2 + num_t1-overlap_n)]
  t12i      <- intersect(t1i, t2i)  # both tissues
  ut1i      <- setdiff(t1i, t12i)   # only tissue 1
  ut2i      <- setdiff(t2i, t12i)   # only tissue 2
  
  # which snps go into the instrument
  h_tmp@index_snp <- 1:length(h_mvmr@snps) %in% c(t12i, ut1i, ut2i)
  
  # weight tissue 1 betas
  h_tmp@bx[ut2i,1] <- h_tmp@bx[ut2i,1] * pph4_lo 
  h_tmp@bx[t1i,1]  <- h_tmp@bx[t1i,1] * pph4_hi 
  
  # weight tissue 2 betas
  h_tmp@bx[ut1i,2] <- h_tmp@bx[ut1i,2] * pph4_lo 
  h_tmp@bx[t2i,2]  <- h_tmp@bx[t2i,2] * pph4_hi 
  
  # naming
  h_tmp@exposure <- c("MV-beta tissue 1", "MV-beta tissue 2")
  
  # run mr
  r <- run_mr(h_tmp, methods="mr_ivw")
  
  r[, `:=`(num_t1   = num_t1, 
           num_t2   = num_t2, 
           num_diff = num_t1-num_t2, 
           overlap  = overlap, 
           seed     = seed, 
           pph4_lo  = pph4_lo, 
           pph4_hi  = pph4_hi)]
  
  return(r)
})


ggplot(mr_num_loci[, num_diff_lab := paste0("Delta~nSNP[T1-T2]:~ ", num_diff)], 
       aes(x=as.factor(overlap), y=b, fill=exposure, color=num_t1)) +
  geom_hline(yintercept = 0.0, linetype="dashed", color="darkgray") +
  geom_violin(width=1, position= position_dodge(width = 0.5), color=NA) +
  scale_x_discrete(labels = function(x) scales::percent(as.numeric(x))) +
  theme_classic() +
  theme(legend.position = "top") +
  labs(x = "Instrument SNP overlap (% of smaller instrument)", 
       y = "MVMR estimate", 
       fill = "Tissue") +
  facet_wrap(~num_diff_lab, 
             nrow = 1,
             labeller = label_parsed)

BMI:eQTL PPH4 distributions by tissue

Now lets look at the actual distributions of the PPH4 values in each tissue, since they are not fixed like in the above analyses. Take the maximum PPH4 from genes in close proximity to each variant, in each tissue and plot the distributions - these are the weights that would be used in the tissue-partitioned MR procedure.

num_tissues <- 6
tissues_subset <- levels(coloc_counts$tissue)[as.integer(seq(1, length(levels(coloc_counts$tissue)), length.out=num_tissues))] 
coloc_subset <- coloc_results[locus_trait=="bmi" & trait.x=="bmi" & (trait.y %in% tissues_subset) & grepl("protein_coding", gene_type.y), .SD[which.max(pph4)], by=.(locus_index_rsid,trait.y)]

density <- coloc_subset[, {
  den <- density(pph4, n=1000)
  .(pph4 = scales::rescale(den$x, 0:1), prob = den$y)
}, by = c(tissue="trait.y")][, tissue := factor(tissue, levels = levels(coloc_counts$tissue))]

ggplot(density, aes(x=pph4, y=prob, color=tissue)) +
  theme_classic() +
  geom_line() +
  labs(y = "Density")

Use of these distributions in simulations

If we want ‘real-world’ PPH4 weights for use in simulations we can just sample from these density distributions. Here is a quick check to show that sampling from them replicates the shape.

tissues <- expand.grid(tissue = unique(density$tissue), stringsAsFactors = F) |> as.data.table()

set.seed(123)

res_sim_density <- tissues[, {
  .(sim_pph4 = sample(density[tissue==.BY$tissue, pph4], round(length(h_mvmr@bx[,1])/2), prob = density[tissue==.BY$tissue, prob], replace = TRUE))
}, by = .(tissue)]


ggplot(res_sim_density, aes(x=sim_pph4, color=tissue)) +
  theme_classic() +
  geom_density() +
  labs(y = "Simulated density", 
       x = "Simulated PPH4", 
       color = "Tissue")

Effect different PPH4 weights

Weight the BMI betas by their respective maximum PPH4 value, see how this changes the relationship between the beta for BMI and beta for HF, dependent on which tissue is providing the betas. Tissues with greater or fewer colocalising variants affect push the betas up or down more, respectively. I hold one tissue constant (reference) to more easily appreciate how the other tissue’s colocalisation characteristics affect things.

ref_tissue   <- levels(density$tissue)[1]
tissue_pairs <- expand.grid(t1 = ref_tissue, t2 = unique(density$tissue), stringsAsFactors = F) |> as.data.table()

set.seed(123)
t1_pph4 <- sample(density[tissue==ref_tissue, pph4], length(h_mvmr@bx[,1]), prob = density[tissue==ref_tissue, prob], replace = TRUE)

res_weighted <- tissue_pairs[, {
  h_tmp <- h_mvmr
  t2_pph4 <- sample(density[tissue==.BY$t2, pph4], length(h_tmp@bx[,1]), prob = density[tissue==.BY$t2, prob], replace = TRUE)
  h_tmp@bx[,1] <- h_tmp@bx[,1] * t1_pph4
  h_tmp@bx[,2] <- h_tmp@bx[,2] * t2_pph4
  h_tmp@index_snp <- ifelse(t1_pph4>coloc_thresh | t2_pph4>coloc_thresh, TRUE, FALSE)
  .(tissue   = rep(c(paste0("Ref: ", ref_tissue),"Comparator"), each=sum(h_tmp@index_snp)), 
    beta_bmi = c(h_tmp@bx[h_tmp@index_snp,1], h_tmp@bx[h_tmp@index_snp,2]), 
    beta_hf  = rep(h_tmp@by[h_tmp@index_snp],2),
    mean_pph4_t1 = mean(t1_pph4[t1_pph4>coloc_thresh | t2_pph4>coloc_thresh]),
    mean_pph4_t2 = mean(t2_pph4[t1_pph4>coloc_thresh | t2_pph4>coloc_thresh]), 
    rmse_pph4    = sqrt(mean((t1_pph4[t1_pph4>coloc_thresh | t2_pph4>coloc_thresh] - t2_pph4[t1_pph4>coloc_thresh | t2_pph4>coloc_thresh])^2))
   )
}, by = .(t1,t2)]
res_weighted[, t2 := factor(t2, levels = unique(t2[order(-mean_pph4_t1)]))]

ggplot(res_weighted, aes(x=beta_bmi, y=beta_hf, color=tissue)) +
  geom_point() +
  geom_text(data = unique(res_weighted[grepl("Ref", tissue)], by="t2"), 
            aes(x = 0.0, 
                y = -0.04,
                label = sprintf("Tis mPPH4=%.3f", mean_pph4_t2)),
            color="black", size=2) +
  geom_text(data = unique(res_weighted[!grepl("Ref", tissue)], by="t2"), 
            aes(x = 0.0, 
                y = -0.07,
                label = sprintf("Ref mPPH4=%.3f", mean_pph4_t1)),
            color="black", size=2) +
  geom_smooth(method="lm", se=F,fullrange = TRUE) +
  facet_wrap(~t2, labeller = labeller(t2 = function(x) gsub("_", " ", x))) +
  labs(subtitle = paste("coloc thresh:", coloc_thresh), 
       y = "Heart failure beta", 
       x = "BMI beta (PPH4-weighted)", 
       color = "Tissue")

IVW regression by different PPH4 weighting distributions

Perform MVMR with the weighted BMI instruments. Lets look at varying the distributions that the PPH4 values are drawn from. Poorly colocalising tissues MVMR result is penalised against more highly colocalising tissues.

Function to weight betas and run MR

#' @title TPMR helper function
#' @param mrobj genepi.utils MR object (exposures BMI:BMI, outcome HF)
#' @param density data.table of PPH4 density distributions cols: tissue | pph4 | prob
#' @param ref character vector, tissues in density$tissue
#' @param comp character vector, tissues in density$tissue
#' @param coloc_thresh numeric 0-1
#' @param rep integer, number of replications (random sampling of BMI variants into instrument)
#' @returns data.table of MR results
#'
tpmr <- function(obj, ref, comp, coloc_thresh, rep = 1, density = NULL, pph4 = NULL) {
  
  # either density or actual PPH4 values
  stopifnot("Provide either denisty distributions or actual PPH4 values" = 
              sum(sapply(list(density, pph4), is.null)) == 1)
  stopifnot("Replications must be 1 if using actual PPH4 values" = ifelse(rep > 1 & !is.null(pph4), F, T))
              
  # run the MR for each of the exposure combinations
  tissue_pairs <- expand.grid(t1 = as.character(ref), 
                              t2 = as.character(comp),
                              seed = 2000:(2000+rep-1),
                              stringsAsFactors = F) |> as.data.table()
  
  library(future)
  library(furrr)
  plan(multisession)
  mr_weighted <- future_pmap_dfr(tissue_pairs, function(t1, t2, seed) {
    
    mrobj <- copy(obj)
    
    if (!is.null(density)) {
    
      # generate the PPH4 values randomly for the variants
      set.seed(seed)
      t1_pph4 <- sample(density[tissue==t1, pph4], length(mrobj@snps), prob = density[tissue==t1, prob], replace = TRUE)
      set.seed(seed+1)
      t2_pph4 <- sample(density[tissue==t2, pph4], length(mrobj@snps), prob = density[tissue==t2, prob], replace = TRUE)
     
    } else if(!is.null(pph4)) {
      
      chrpos <- data.table(chr = as.integer(mrobj@chr), bp = mrobj@bp, t1 = t1, t2 = t2)
      chrpos[pph4, t1.pph4 := i.pph4, on=c("chr"="chr","bp"="bp","t1"="tissue")]
      chrpos[pph4, t2.pph4 := i.pph4, on=c("chr"="chr","bp"="bp","t2"="tissue")]
      t1_pph4 <- chrpos$t1.pph4
      t2_pph4 <- chrpos$t2.pph4
      t1_pph4[is.na(t1_pph4)] <- 0
      t2_pph4[is.na(t2_pph4)] <- 0
      
    }
    
    # based on the distribution of PPH4, which SNPs are included
    include <- ifelse(t1_pph4 > coloc_thresh | t2_pph4 > coloc_thresh, TRUE, FALSE)
    
    # update the MR object, turn off the excluded snps (@index_snp slot)
    mrobj@bx[,1]      <- mrobj@bx[,1] * t1_pph4
    mrobj@bx[,2]      <- mrobj@bx[,2] * t2_pph4
    mrobj@index_snp   <- include
    mrobj@exposure[1] <- ifelse(length(ref)==1, paste0("Ref: ", ref_tissue), "Tissue 1")
    mrobj@exposure[2] <- ifelse(length(ref)==1, "Comparator", "Tissue 2")
    
    # run the MR
    r <- run_mr(mrobj, methods="mr_ivw")
    
    # add info about the instrument used
    r[, `:=`(t1             = t1, 
             t2             = t2,
             seed           = seed,
             n_sig_snps     = c(sum(t1_pph4 > coloc_thresh), sum(t2_pph4 > coloc_thresh)), 
             n_overlap_snps = sum(t1_pph4 > coloc_thresh & t2_pph4 > coloc_thresh),
             pct_t1_overlap = sum(t1_pph4 > coloc_thresh & t2_pph4 > coloc_thresh) / length(t1_pph4),
             mean_pph4_t1   = mean(t1_pph4[include]),
             mean_pph4_t2   = mean(t2_pph4[include]),
             rmse_pph4      = sqrt(mean((t1_pph4[include] - t2_pph4[include])^2)), 
             rmse_wbeta     = sqrt(mean((mrobj@bx[,1] - mrobj@bx[,2])^2)))]
    r[, c("overdispersion", "n_pc", "n_hunted", "slopehunter_pi", "slopehunter_ent") := NULL]
    
  })
  
  return(mr_weighted)
}

Different PPH4 distributions against each other

# run TPMR
mr_weighted <- tpmr(obj=h_mvmr, density=density, 
                    ref= unique(density$tissue), 
                    comp=unique(density$tissue), 
                    coloc_thresh=0.8, 
                    rep = 100)

# reorder for plotting 
mr_weighted[, mean_pph4_t1_lab := round(mean(mean_pph4_t1),2), by=.(t1)]
mr_weighted[, mean_pph4_t2_lab := round(mean(mean_pph4_t2),2), by=.(t2)]
mr_weighted[, mean_n_sig_snps := round(mean(mean_pph4_t2),0), by=.(t1,t2)]

ggplot(mr_weighted, 
       aes(x=as.factor(mean_pph4_t2_lab), y=b, color=exposure)) +
  geom_boxplot() +
  theme_bw() +
  labs(y = "MVMR estimate", x = "T2 mean PPH4", color = "Tissue") +
  facet_wrap(~ as.factor(mean_pph4_t1_lab), 
             labeller = labeller(`as.factor(mean_pph4_t1_lab)`=function(x) paste0("T1 mean PPH4: ",x)))

ggplot(mr_weighted[, .(b=mean(b)), by=.(t1,t2,exposure)][, .(b_diff=b[1]-b[2]), by=.(t1,t2)], 
       aes(y=t1, x=t2, fill=b_diff)) +
  geom_tile() +
  scale_fill_gradient2() +
  theme(axis.text.x  = element_text(angle=45, hjust=1)) +
  labs(y = "Tissue 1", x = "Tissue 2", fill = "T1-T2 beta diff")

All tissue pairs MR (actual PPH4 values)

Now let’s use the real data and do the all GTEx tissue against all other GTEx tissues. The poorly colocalising tissues have lower MVMR estimates against more highly coloclising tissues.

tissue_pph4_vals <- coloc_results[locus_trait=="bmi" & 
                                    trait.x=="bmi" & 
                                    gene_type.y=="protein_coding", 
                                  .SD[which.max(pph4)], 
                                  by=.(locus_index_rsid, tissue=trait.y), 
                                  .SDcols=c("chr", "bp", "pph4")]
tissue_pph4_vals <- tidyr::complete(tissue_pph4_vals, locus_index_rsid, tissue) |> as.data.table()

mr_actual <- tpmr(obj=h_mvmr, pph4=tissue_pph4_vals, 
                  ref= unique(tissue_pph4_vals$tissue), 
                  comp=unique(tissue_pph4_vals$tissue), 
                  coloc_thresh=0.8)

ggplot((mr_actual
        [coloc_counts, t1_n := i.mean_sample_n, on=c("t1"="tissue")]
        [coloc_counts, t2_n := i.mean_sample_n, on=c("t2"="tissue")]
        [, `:=`(b_diff     = b[exposure=="Tissue 1"]-b[exposure=="Tissue 2"],
                snp_diff   = n_sig_snps[exposure=="Tissue 1"]-n_sig_snps[exposure=="Tissue 2"],
                pph4_diff  = mean_pph4_t1-mean_pph4_t2, 
                nsamp_diff = t1_n - t2_n), by=.(t1,t2)]
        [, `:=`(t1 = factor(t1, levels = unique(t1[order(-b_diff)])),
                t2 = factor(t2, levels = unique(t1[order(-b_diff)])))]),
       aes(y=t1, x=t2, fill=b_diff)) +
  geom_tile() +
  scale_fill_gradient2() +
  theme(axis.text.x  = element_text(angle=45, hjust=1)) +
  labs(y = "Tissue 1", x = "Tissue 2", fill = "T1-T2 beta diff")

MVMR estimates depend on difference in instrument size (which also depends on difference in sample size)

Plot the different between MVMR estimates from a the 2-exposure MVMR procedure above - i.e. how separated are the effects attributed to each tissue. Is the magnitude of the separation related to the relative number of SNPs in each instrument, and does this depend on the sample size.

t1_highlight <- c("cortex", "frontal_cortex", "hippocampus", "hypothalamus")
t2_highlight <- c("subcut_adipose")
ggplot((mr_actual
        [coloc_counts, t1_n := i.mean_sample_n, on=c("t1"="tissue")]
        [coloc_counts, t2_n := i.mean_sample_n, on=c("t2"="tissue")]
        [, `:=`(b_diff     = b[exposure=="Tissue 1"]-b[exposure=="Tissue 2"],
                snp_diff   = n_sig_snps[exposure=="Tissue 1"]-n_sig_snps[exposure=="Tissue 2"],
                pph4_diff  = mean_pph4_t1-mean_pph4_t2, 
                nsamp_diff = t1_n - t2_n), by=.(t1,t2)]
       ),
       aes(y=b_diff, x=snp_diff, color=nsamp_diff)) +
  geom_hline(yintercept = 0.0, linetype="dotted", color="grey") +
  geom_vline(xintercept = 0.0, linetype="dotted", color="grey") +
  geom_point(alpha=0.3) +
  geom_label_repel(data = unique(mr_actual[t1 %in% t1_highlight & t2 %in% t2_highlight][, lab := paste0(t1,"/",t2)], by="lab"), aes(label=lab), max.overlaps=Inf, nudge_y=0.5, color="black") +
  scale_color_viridis_c(option="magma") +
  geom_smooth(method="lm", se=FALSE, color="red") +
  theme_classic2() +
  theme(legend.position = "top") +
  labs(y = expression(Delta[T1-T2]~MVMR~estimate), 
       x = expression(Delta[T1-T2]~No.~coloc~SNPs), 
       color = expression(Delta[T1-T2]~Tissue~sample~size))

Interpretation so far

Is the MVMR estimate mostly related to the difference in the relative number of colocalising SNPs in tissue A vs. tissue B? The tissue with fewer colocalising SNPs may be overpowered in the MVMR by the tissue with a greater number of colocalising SNPs, since number of colocalising SNPs is related to sample size of the eQTL dataset, is the MVMR estimate just a function of sample size? We not clearly, as despite the trend there are numerous tissues which show the reverse pattern - e.g. brain tissues where the sample size and number of colocalising loci is actually less than for subcutaneous adipose tissue, yet the MVMR result yeilds a larger contribution from the brain tissue instrument (i.e. the points live in the North-West quantrant of the plot above). Let’s look at why this happens.

Brain vs adipose tissue

Lets break down the characteristics of the brain and adipose tissue-partitioned BMI instruments.

Number of colocalising loci

ggplot((coloc_counts
        [tissue %in% c(t1_highlight, t2_highlight)]
        [, tissue_class := ifelse(tissue %in% t1_highlight, "Brain", "Adipose")]
        ), aes(x = tissue, y = `num_coloc_pph4`, fill = tissue_class)) +
  theme_classic2() +
  geom_col(alpha = 0.5) +
  labs(subtitle = paste("coloc thresh:", coloc_thresh), 
       y = "Number of colocalising BMI variants") +
  theme(axis.title.x = element_blank(), 
        axis.text.x = element_text(angle=45, hjust=1)) +
  guides(fill = "none") +
  facet_wrap(~tissue_class, scales = "free_x")

Distribution of PPH4

brain_adip_coloc <- coloc_results[locus_trait=="bmi" & trait.x=="bmi" & (trait.y %in% c(t1_highlight,t2_highlight)) & grepl("protein_coding", gene_type.y), .SD[which.max(pph4)], by=.(locus_index_rsid,trait.y)]
bmi_gwas_data <- as.data.table(h_mvmr)[, chr:= as.integer(chr)]
brain_adip_coloc[bmi_gwas_data, `:=`(bmi_beta = i.bx, weighted_bmi_beta = pph4*i.bx), on=.(chr,bp)]

ggplot(brain_adip_coloc, aes(x=pph4, color=trait.y)) +
  theme_classic() +
  geom_density() +
  labs(y = "Density", x = "PPH4", color="Tissue")

Distribution of weighted BMI betas (all)

ggplot(brain_adip_coloc[, locus_index_rsid:=factor(locus_index_rsid, levels=unique(locus_index_rsid[order(-weighted_bmi_beta)]))], 
       aes(x=locus_index_rsid, y=trait.y, fill=weighted_bmi_beta)) +
  theme_classic2() +
  geom_tile() +
  scale_fill_viridis_c() +
  labs(y = "Density", x = "Weighted BMI beta", color="Tissue")