Introducción a R: Tablas chulas y comandos estadísticos

Acá hay un link con muchos ejemplos: kableExtra

JuveYell

library(gt)
#> 
#> Attaching package: 'gt'
#> The following object is masked from 'package:Hmisc':
#> 
#>     html

m_m %>%gt()

Especie	Familia	Longitud	Peso	Habitat
Foca-freddy	Phocidae	2.5	150	Ártico
Delfín-flipper	Delphinidae	3.2	200	Océano Atlántico
Ballena-migaloo	Balaenopteridae	18.7	2500	Océano Pacífico
León marino-aslan	Otariidae	2.1	180	Mar Mediterráneo
Orca-willy	Delphinidae	9.5	400	Océano Atlántico

library(gtExtras)
m_m %>%
  gt() %>%
  gt_hulk_col_numeric(Peso)

Especie	Familia	Longitud	Peso	Habitat
Foca-freddy	Phocidae	2.5	150	Ártico
Delfín-flipper	Delphinidae	3.2	200	Océano Atlántico
Ballena-migaloo	Balaenopteridae	18.7	2500	Océano Pacífico
León marino-aslan	Otariidae	2.1	180	Mar Mediterráneo
Orca-willy	Delphinidae	9.5	400	Océano Atlántico

m_m %>%
  gt() %>%
  gt_color_rows(Peso:Longitud, palette = c("white", "green"))

Especie	Familia	Longitud	Peso	Habitat
Foca-freddy	Phocidae	2.5	150	Ártico
Delfín-flipper	Delphinidae	3.2	200	Océano Atlántico
Ballena-migaloo	Balaenopteridae	18.7	2500	Océano Pacífico
León marino-aslan	Otariidae	2.1	180	Mar Mediterráneo
Orca-willy	Delphinidae	9.5	400	Océano Atlántico

head(m_m[,1:4]) %>% 
  gt() %>% 
  gt_highlight_rows(rows = 2, font_weight = "normal")

Especie	Familia	Longitud	Peso
Foca-freddy	Phocidae	2.5	150
Delfín-flipper	Delphinidae	3.2	200
Ballena-migaloo	Balaenopteridae	18.7	2500
León marino-aslan	Otariidae	2.1	180
Orca-willy	Delphinidae	9.5	400

m_m %>% 
  gt() %>% 
  gt_plt_bullet(column = Peso, target = Longitud)

Especie	Familia	Habitat
Foca-freddy	Phocidae	Ártico
Delfín-flipper	Delphinidae	Océano Atlántico
Ballena-migaloo	Balaenopteridae	Océano Pacífico
León marino-aslan	Otariidae	Mar Mediterráneo
Orca-willy	Delphinidae	Océano Atlántico

m_m%>%
  dplyr::group_by(Habitat) %>%
  dplyr::summarize(Tendencia = list(Longitud), .groups = "drop") %>%
  gt() %>%
  gt_plt_sparkline(Tendencia)
#> `geom_line()`: Each group consists of only one observation.
#> ℹ Do you need to adjust the group aesthetic?
#> `geom_line()`: Each group consists of only one observation.
#> ℹ Do you need to adjust the group aesthetic?
#> `geom_line()`: Each group consists of only one observation.
#> ℹ Do you need to adjust the group aesthetic?

Habitat	Tendencia
Mar Mediterráneo
Océano Atlántico
Océano Pacífico
Ártico

m_m %>%
  head() %>%
  dplyr::select(Familia, Longitud) %>%
  dplyr::mutate(Long_max = round(Longitud/max(Longitud), digits = 2),
                Long_min = round(Longitud/min(Longitud), digits = 2)) %>%
  dplyr::mutate(Long_cruda = Longitud) %>%
  gt() %>%
  gt_plt_bar_pct(column = Long_min, scaled = TRUE) %>%
  gt_plt_bar_pct(column = Long_cruda, scaled = FALSE, fill = "blue", background = "lightblue") %>%
  cols_align("center", contains("scale")) %>%
  cols_width(4 ~ px(125),
             5 ~ px(125))

Familia	Longitud	Long_max
Phocidae	2.5	0.13
Delphinidae	3.2	0.17
Balaenopteridae	18.7	1.00
Otariidae	2.1	0.11
Delphinidae	9.5	0.51


m_m %>%
  gt()%>%
  tab_header(title = "Mamíferos") %>%
  tab_style(style = list(cell_fill(color = "#b2f7ef"),
                           cell_text(weight = "bold")),
                 locations = cells_body(columns = Longitud))%>%
      tab_style(style = list(cell_fill(color = "#ffefb5"),
                               cell_text(weight = "bold")), 
                     locations = cells_body(columns = Peso))

Especie	Familia	Longitud	Peso	Habitat
Mamíferos
Foca-freddy	Phocidae	2.5	150	Ártico
Delfín-flipper	Delphinidae	3.2	200	Océano Atlántico
Ballena-migaloo	Balaenopteridae	18.7	2500	Océano Pacífico
León marino-aslan	Otariidae	2.1	180	Mar Mediterráneo
Orca-willy	Delphinidae	9.5	400	Océano Atlántico

JuveYell

library(gtExtras)
m_m %>% 
  gt() %>% 
  gt_theme_nytimes() %>% 
  tab_header(title = "Mamíferos")

Especie	Familia	Longitud	Peso	Habitat
Mamíferos
Foca-freddy	Phocidae	2.5	150	Ártico
Delfín-flipper	Delphinidae	3.2	200	Océano Atlántico
Ballena-migaloo	Balaenopteridae	18.7	2500	Océano Pacífico
León marino-aslan	Otariidae	2.1	180	Mar Mediterráneo
Orca-willy	Delphinidae	9.5	400	Océano Atlántico

Acá hay un link con muchos ejemplos: gtExtras

library(DT)

# Crear una tabla interactiva con búsqueda y ordenamiento utilizando DT
datatable(m_m)

Acá hay un link con muchos ejemplos: DT

JuveYell

library(formattable)
#> 
#> Attaching package: 'formattable'
#> The following object is masked from 'package:gt':
#> 
#>     currency
#> The following object is masked from 'package:plotly':
#> 
#>     style


tf<-formattable(m_m, list(
 Especie = color_bar("#e9c46a"),
                  Longitud = color_bar("#80ed99"),
                                   Peso = color_bar("#48cae4"),
                                                   Familia = color_bar("#f28482")))
print(tf)

formattable(m_m, list(
  Longitud= formatter("span",
                style=x~style(color=ifelse(rank(-x)<=3,"red","gray")),
                x~sprintf("%.4f (rank: %2d)", x, rank(-x))
  ),
  Peso= formatter("span",
                     style=x~style(color=ifelse(x,"green","red")),
                     x~icontext(ifelse(x,"ok","remove"),ifelse(x,"Delphinidae","Otariidae"))
  ),
  Habitat = formatter("span", style=x~ifelse(x=="Océano Atlántico",style(color="red",font.weight="bold"),NA)),
  Especie = color_bar("pink")
))

Especie	Familia	Longitud	Peso	Habitat
Foca-freddy	Phocidae	2.5000 (rank: 4)	Delphinidae	Ártico
Delfín-flipper	Delphinidae	3.2000 (rank: 3)	Delphinidae	Océano Atlántico
Ballena-migaloo	Balaenopteridae	18.7000 (rank: 1)	Delphinidae	Océano Pacífico
León marino-aslan	Otariidae	2.1000 (rank: 5)	Delphinidae	Mar Mediterráneo
Orca-willy	Delphinidae	9.5000 (rank: 2)	Delphinidae	Océano Atlántico

library(sparkline)
sparkline(c(1,2,7,6,5), type = "bar", barColor = "green")

Acá hay un link con muchos ejemplos: formattable

Comandos para relaizar test de Hipotésis

Test de hipótesis

JuveYell

Un factor (dos niveles) Test de t

¿Son todas las tasas de crecimiento diferentes entre los jóvenes y viejos?

H0: µ_viejo=µ_joven

Ha: µ_viejo≠µ_joven

t.test(y~x,data=datos, paired=FALSE, var.equal =TRUE)

t.test(y~x,data=datos, paired=FALSE, var.equal =FALSE)

Notese que var.equal= FALSE se utiliza si no tenemos homogeneidad de varianza

Un factor (varios niveles) ANOVA

¿Son todas las tasas de crecimiento iguales?

H0: µ_lento=µ_intermedio=µ_rapido

Ha: al menos un µ difiere

ANOVA

Variables continuas con propósito predictivos REGRESION

JuveYell

Supuestos en general

Normalidad
Homocedasticidad (dos opciones)
Independecia

Evaluación de los supuestos en análisis de dos muetras (test t)

Prueba de normalidad Shapiro-Wilk

Hipótesis

H0: Las variables siguen distribución normal

Ha: Las variables no siguen distribución normal

Prueba de normalidad Shapiro-wilk para las variables respuesta

#> 
#>  F test to compare two variances
#> 
#> data:  growth_rate by edad_g
#> F = 0.69065, num df = 8, denom df = 38, p-value = 0.6057
#> alternative hypothesis: true ratio of variances is not equal to 1
#> 95 percent confidence interval:
#>  0.2710721 2.6578969
#> sample estimates:
#> ratio of variances 
#>          0.6906472

Gráficamente

p <- datos16 %>%
  ggplot( aes(x=growth_rate, fill=edad_g)) +
    geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
    scale_fill_manual(values=c("#69b3a2", "#404080")) +
    geom_line(stat = "density")+
    facet_grid(~edad_g)
    labs(fill="")
#> $fill
#> [1] ""
#> 
#> attr(,"class")
#> [1] "labels"
p
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Estadísticas en Tabla

#> 
#> Call:
#> lm(formula = growth_rate ~ type_g, data = datos16)
#> 
#> Residuals:
#>       Min        1Q    Median        3Q       Max 
#> -0.245303 -0.060828 -0.005251  0.054704  0.195197 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)   2.13390    0.04295  49.682  < 2e-16 ***
#> type_gLow    -0.43230    0.04649  -9.299 4.81e-12 ***
#> type_gMedium -0.18777    0.05853  -3.208  0.00246 ** 
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.1052 on 45 degrees of freedom
#> Multiple R-squared:  0.6984, Adjusted R-squared:  0.685 
#> F-statistic: 52.11 on 2 and 45 DF,  p-value: 1.931e-12

#>             Df Sum Sq Mean Sq F value   Pr(>F)    
#> type_g       2 1.1536  0.5768   52.11 1.93e-12 ***
#> Residuals   45 0.4981  0.0111                     
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Plot para ver supuestos

# Modelo con lm
lmanov<- lm(growth_rate~type_g, datos16)
par(mfrow=c(2,2)) 
plot(lmanov)

# Modelo con aov
anov<- aov(growth_rate~type_g, datos16)
par(mfrow=c(2,2)) 
plot(anov)

ggqqplot(datos16, "growth_rate", facet.by = c("type_g"))

Test para ver homocedasticidad

var.test(growth_rate ~edad_g, data = datos16)
#> 
#>  F test to compare two variances
#> 
#> data:  growth_rate by edad_g
#> F = 0.69065, num df = 8, denom df = 38, p-value = 0.6057
#> alternative hypothesis: true ratio of variances is not equal to 1
#> 95 percent confidence interval:
#>  0.2710721 2.6578969
#> sample estimates:
#> ratio of variances 
#>          0.6906472

Salida final para interpretar estadísticos

summary(lmanov)
#> 
#> Call:
#> lm(formula = growth_rate ~ type_g, data = datos16)
#> 
#> Residuals:
#>       Min        1Q    Median        3Q       Max 
#> -0.245303 -0.060828 -0.005251  0.054704  0.195197 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)   2.13390    0.04295  49.682  < 2e-16 ***
#> type_gLow    -0.43230    0.04649  -9.299 4.81e-12 ***
#> type_gMedium -0.18777    0.05853  -3.208  0.00246 ** 
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.1052 on 45 degrees of freedom
#> Multiple R-squared:  0.6984, Adjusted R-squared:  0.685 
#> F-statistic: 52.11 on 2 and 45 DF,  p-value: 1.931e-12
anova(lmanov)

summary(anov)
#>             Df Sum Sq Mean Sq F value   Pr(>F)    
#> type_g       2 1.1536  0.5768   52.11 1.93e-12 ***
#> Residuals   45 0.4981  0.0111                     
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Posthoc (pueden revisar la library(emmeans))

Muchos ejemplos de diferentes tipos de posthoc emmeans

t <-TukeyHSD(anov)
print(t)
#>   Tukey multiple comparisons of means
#>     95% family-wise confidence level
#> 
#> Fit: aov(formula = growth_rate ~ type_g, data = datos16)
#> 
#> $type_g
#>                   diff        lwr         upr     p adj
#> Low-High    -0.4322971 -0.5449630 -0.31963125 0.0000000
#> Medium-High -0.1877714 -0.3296305 -0.04591233 0.0068283
#> Medium-Low   0.2445257  0.1389530  0.35009845 0.0000035

Ahora ya podemos hacer un plot incorporando posthoc


boxplot(growth_rate~ type_g,data=datos16, ylim=c(1.3,2.6),
        xlab="Type", ylab="growth_rate", col=(c("gold","darkgreen", "grey")))
tp <- extract_p(t)
groups2 <- multcompLetters(t$type_g[,4])
lets <- groups2$Letters[c(4,1:3)]
text(1:3, 2.6 ,lets)

Otras opciones

aov.test <- datos16 %>%
  anova_test(growth_rate~ type_g)
print(aov.test)
#> ANOVA Table (type II tests)
#> 
#>   Effect DFn DFd      F        p p<.05   ges
#> 1 type_g   2  45 52.113 1.93e-12     * 0.698

datos16 %>%
  levene_test(growth_rate~ type_g)

Post hoc númerico

datos16 %>% tukey_hsd(growth_rate~ type_g)

pwc <- datos16 %>% 
  tukey_hsd(growth_rate~ type_g)
pwc

print(t)
#>   Tukey multiple comparisons of means
#>     95% family-wise confidence level
#> 
#> Fit: aov(formula = growth_rate ~ type_g, data = datos16)
#> 
#> $type_g
#>                   diff        lwr         upr     p adj
#> Low-High    -0.4322971 -0.5449630 -0.31963125 0.0000000
#> Medium-High -0.1877714 -0.3296305 -0.04591233 0.0068283
#> Medium-Low   0.2445257  0.1389530  0.35009845 0.0000035

Post hoc gráfico

pwc <- pwc %>% 
  add_xy_position(x = "type_g")
ggboxplot(datos16, x = "type_g", y = "growth_rate", color = "type_g", palette = "jco", add = "jitter") +
  stat_pvalue_manual(pwc, hide.ns = TRUE) +
  labs(subtitle = get_test_label(aov.test, detailed = TRUE),
    caption = get_pwc_label(pwc))

library(stargazer)

stargazer(lmanov, type = "text",
          digits = 3,
          star.cutoffs = c(0.05, 0.01, 0.001),
          digit.separator = "")

Regresión

Evaluación de los Supuestos del modelo Regresión

Atención a diferencia del ejemplo anterior (t test) acá se parte del modelos (porque necesitamos los residuales) para evaluar los supuestos

m <- lm(growth_rate~age,data=datos16)
summary(m)
#> 
#> Call:
#> lm(formula = growth_rate ~ age, data = datos16)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -0.2712 -0.1221 -0.0185  0.1112  0.3940 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  2.595251   0.200256  12.960  < 2e-16 ***
#> age         -0.002300   0.000569  -4.043 0.000199 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 0.1628 on 46 degrees of freedom
#> Multiple R-squared:  0.2621, Adjusted R-squared:  0.2461 
#> F-statistic: 16.34 on 1 and 46 DF,  p-value: 0.0001994
anova(m)

Se miran gráficamente

autoplot(m)

Gráfica de los resultados

ggscatter(datos, x = "age", y = "growth_rate",
          add = "reg.line",                             
          conf.int = TRUE,                                 
          add.params = list(color = "black",
                            fill = "lightgray")
          )+
  stat_cor(method = "pearson", label.x =270, label.y = 2.5)

JuveYell

Tablas para test

library(Hmisc)
library(tables)
stderr <- function(x) sd(x)/sqrt(length(x))
latex(
  object = tabular((Species+1) ~ All(iris)* PlusMinus(mean, stderr, digits=1), data=iris)
 , title = "Test"
 , file=""
 , size = "small"
 , cdot = 3
 , here = TRUE
 , booktabs=TRUE
 , center="centering"
 )

JuveYell

library(formattable)
datost<- read.csv("/home/kechu/Documentos/Curso_R/tabla.csv")
formattable(datost, 
            align = c("l",rep("r", NCOL(datost) - 1)),
            list(`Opciones` = formatter("span", style = ~ style(color = "grey", font.weight = "bold")), 
                 `Niveles` = formatter("span", style = ~ style(color = "grey", font.weight = "bold")), 
                 `Test_T`=color_bar("#B1CBEB"),
                 `Anova` =color_bar("#FA614B"),
                 `Regresión`=color_bar("#71CA97"),
                 `Ancova` =color_bar("#3E7DCC")))

Tipo	Niveles	Test_T	Anova	Regresión	Ancova
	2	x
Modelo	3,4,5….		x
	continúa			x
	Factor+continúa				x

	Independencia	x	x	x	x
Supuestos	Normalidad	x	x	x	x
	Homocedasticidad	x(*)	x(*)	x	x
	Línealidad			x	x

(*) Hay comandos alternativos fáciles para resolver el incumplimiento del supuesto

JuveYell

library(gtsummary)


table1 <- 
  datos16 %>%
  tbl_summary(include = c(growth_rate, type_g, edad_g))
print(table1)

table2 <-
  tbl_summary(
    datos16,
    include = c(growth_rate,standard_length, body_depth, age),
    by = type_g, # split table by group
    missing = "no" # don't list missing data separately
  ) %>%
  add_n() %>% # add column with total number of non-missing observations
  add_p() %>% # test for a difference between groups
  modify_header(label = "**Variable**") %>% # update the column header
  bold_labels()
print(table2)

mod1 <- lm(growth_rate ~ body_depth + age + type_g, datos16)

t1 <- tbl_regression(mod1)

print(t1)

Gráficos animados!

JuveYell

p1 <- datos16 %>% 
  ggplot(aes(age, growth_rate, size = standard_length, color = edad_g)) +
  geom_point() +
  labs(x="Tasa de crecimeinto", y="Edad (días)", title="Parametros corporales")+
  theme_cowplot()
p1

p1_animation<- p1 +
  transition_time(year) +
  labs(subtitle = "Year")
p1_animation

p1_animation <- p1 +
  transition_time(year) +
  labs(subtitle = "Year") +
  shadow_wake(wake_length = 0.1, alpha = FALSE)
p1_animation

p1_animation_2 <- p1 +
  geom_text(aes(x = min(age), y = min(growth_rate), label = as.factor(year)) , 
            hjust=-1.5, vjust = -0.2, alpha = 0.2,  col = "gray", size = 20) +
  transition_states(as.factor(year), state_length = 0)
p1_animation_2

p <- ggplot(datos16, aes(age, growth_rate, color = edad_g)) +
  geom_point(aes(size = standard_length, frame = year)) +
  labs(x="Tasa de crecimeinto", y="Edad (días)", title="Parametros corporales")+
theme_cowplot()
ggplotly(p)

Introducción a R: Tablas chulas y comandos estadísticos

2023

Acá hay un link con muchos ejemplos: kableExtra

Acá hay un link con muchos ejemplos: gtExtras

Acá hay un link con muchos ejemplos: DT

Acá hay un link con muchos ejemplos: formattable

Comandos para relaizar test de Hipotésis

Test de hipótesis

Un factor (varios niveles) ANOVA

Variables continuas con propósito predictivos REGRESION

Supuestos en general

Evaluación de los supuestos en análisis de dos muetras (test t)

Prueba de normalidad Shapiro-Wilk

Gráficamente

Estadísticas en Tabla

Plot para ver supuestos

Test para ver homocedasticidad

Salida final para interpretar estadísticos

Posthoc (pueden revisar la library(emmeans))

Ahora ya podemos hacer un plot incorporando posthoc

Otras opciones

Regresión

Tablas para test

Gráficos animados!