Processing math: 100%

Libraries

    library(tidyverse)
 
    
    
    library(psych)
    library(sjPlot)

  
    library(ggthemes)
    library(ggplot2)
    library(MASS)
    library(purrr)

   library(broom)
   library(lm.beta)
  
  

    library(RColorBrewer)  

    library(plotly)

    library(glmnet)
    library(glmnetUtils)

    source("http://www.labape.com.br/rprimi/R/cria_quartis.R")  

Question

In multiple regression with X1...Xn predicting Y each Bi or βi coefficient represents the unique effect of that varible on Y controling all the other variables in the model. If β1 corresponds to the unique effect of X1, and X1 is correlated with X2 where does sharred effect goes ?

Data simulation

This code simulates draw 3 variables Y, X1, and X2 from a multivariate normal random distribution having a specified var_cor matrix of correlations.
It makes a grid of possible correlations params and then simulates data using mvrnormfunction.

  params <- expand.grid(
      r_Y1 = seq(0, .68, by=.05), 
      r_Y2 = seq(0, .68, by=.05),
      r_12= seq(0, .88, by=.05)
  )
 
  simulate_3vars <- function (r_Y1, r_Y2, r_12, n=400 ){
    var_cor <-  matrix(
     c(1.00,  r_12 , r_Y1 ,
       r_12,  1.00,  r_Y2, 
       r_Y1,  r_Y2,  1.00), 
    byrow=TRUE, nrow=3, ncol=3, 
    dimnames = list(
        c("X1", "X2", "Y"), 
        c("X1", "X2", "Y")
        ))
  df <-  try(
      mvrnorm(n, mu = c(0, 0, 0), 
      Sigma = var_cor, empirical = TRUE) %>% as.data.frame(), silent = TRUE) 
    return(df)
  }


  simulated_data  <- params %>% 
     mutate(data =  pmap(., simulate_3vars)
       )
 
  simulated_data  <- params %>% 
     mutate(data =  pmap(., simulate_3vars),
      cl = map_chr(data, class)
       )
  
  
 
 simulated_data <- simulated_data %>%
     filter(cl == "data.frame") %>%
     mutate(
      mult_regres = map(data, ~lm(Y ~ X1 + X2, data=.)),
      lasso_regres  = map(data, ~glmnet(Y ~ X1 + X2, data=. )),
      beta = map(mult_regres, ~lm.beta(.)),
      beta2 = map(beta, "coefficients"),
      glance = map(mult_regres, glance),
      
         )

 
 
  simulated_data <- simulated_data %>%
     mutate(
      beta2 = map(beta2, t)
         ) %>%
     mutate(
      beta2 = map(beta2, as.data.frame)
         )
 
  summar <- simulated_data %>% select(glance) %>% unnest 
  betas <- simulated_data %>% select(beta2) %>% unnest
     
  df <- simulated_data %>% select(1:3) %>% bind_cols(betas, summar)

  names(df)[5:6] <- c("b_x1", "b_x2")
Results

As explained here: https://stats.stackexchange.com/questions/24827/where-is-the-shared-variance-between-all-ivs-in-a-linear-multiple-regression-equ when X1 and X2 have the same correlation with Y as X1 and X2 “come closer and closer to being perfectly correlated, their b-values in the multiple regression come closer and closer to HALF of the b-value in the simple linear regression of either one of them. However, as X1 and X2 come closer and closer to being perfectly correlated, the STANDARD ERROR of b1 and b2 moves closer and closer to infinity, so the t-values converge on zero. So, the t-values will converge on zero (i.e., no UNIQUE linear relationship between either X1 and Y or X2 and Y), but the b-values converge to half the value of the b-values in the simple linear regression … so as the correlation between X1 and X2 approaches unity, EACH of the partial slope coefficients approaches contributing equally to the prediction of the Y value, even though neither independent variable offers any UNIQUE explanation of the dependent variable” (HTH // Phil, 2017)

This is show in the simulation

  df %>% filter(r_Y2 == .50 & r_Y1 == .50) %>%
    ggplot(aes(y=b_x1, x=b_x2, color = r_12, size=r_12)) + 
    geom_point(alpha=1/2)  +
    scale_colour_gradientn(colours = brewer.pal(7, "Paired")) +
    scale_y_continuous(breaks= seq(0.20, .55, by=.05), limits = c(.20,.55)) +
    scale_x_continuous(breaks= seq(0.20, .55, by=.05), limits = c(.20,.55)) +
    geom_vline(xintercept = .5, color = "orange") +
    geom_hline(yintercept = .5,  color = "orange") +
    theme_minimal()

Now look how crazy βs goes when ryx1>ryx2 as rx12 approaches 1. Here we are seeing Simpsom’s paradox.

 df %>% filter(r_Y1 == .50 & r_Y2 == .25 ) %>%
    ggplot(aes(y=b_x1, x=b_x2, color = r_12, size=r_12)) + 
    geom_point(alpha=1/2)  +
    scale_colour_gradientn(colours = brewer.pal(7, "Paired")) +
    geom_vline(xintercept = .25, color = "orange") +
    geom_hline(yintercept = .50,  color = "orange") +
    scale_y_continuous(breaks= round(seq(.3, 1, by=.10), 2), limits = c(.3,1)) +
    scale_x_continuous(breaks= round(seq(-.6, .40, by=.10), 2), limits = c(-.6,.40)) +
    theme_minimal()

Now 3d graph

    colors <- brewer.pal(7, "Paired")
    
    df %>% filter(r_Y1 == .50 & r_Y2 == .25 ) %>%
      plot_ly(y=~b_x1, z=~b_x1, x=~r_12,
        type="scatter3d", 
        mode="markers", color=~r_12, 
          colors = colors,
        size = 3.5,
        opacity = 1/1.5)
00.20.40.60.8r_12
   colors <- brewer.pal(7, "Paired")
    
    df %>% filter(r_Y1 == .50 & r_Y2 == .50 ) %>%
      plot_ly(y=~b_x1, z=~b_x1, x=~r_12,
        type="scatter3d", 
        mode="markers", color=~r_12, 
          colors = colors,
        size = 3.5,
        opacity = 1/1.5)
00.20.40.60.8r_12

Lasso regression

# http://varianceexplained.org/broom-gallery/snippets/broom-glmnet.html
 

    tidy(simulated_data$lasso_regr[[1]])

    tidied <- tidy(simulated_data$lasso_regr[[1]]) %>% filter(term != "(Intercept)")
    
    ggplot(tidied, aes(step, estimate, group = term)) + geom_line()

    coefficients <- simulated_data$lasso_regr[[1]] %>%
     broom::tidy() %>%
     mutate(log_lambda = log(lambda)) %>%
     filter(term != "(Intercept)")
    
    plot(simulated_data$lasso_regr[[1]], xvar = "lambda", label = TRUE)
    
    ggplot(coefficients, aes(x=log_lambda, y=estimate, col=term)) +
     geom_line() + coord_cartesian(xlim=log(coefficients$lambda))
    
   
    tidied_cv <- tidy(simulated_data$lasso_regr[[1]])
    glance_cv <- glance(simulated_data$lasso_regr[[1]])