Vamos a generar un conjunto de datos llamado “m_m” que contiene información sobre diferentes especies de mamíferos marinos. Queremos realizar diferentes tipos de tablas:
m_m <- data.frame(
Especie = c("Foca-freddy", "Delfín-flipper", "Ballena-migaloo", "León marino-aslan", "Orca-willy"),
Familia = c("Phocidae", "Delphinidae", "Balaenopteridae", "Otariidae", "Delphinidae"),
Longitud = c(2.5, 3.2, 18.7, 2.1, 9.5),
Peso = c(150, 200, 2500, 180, 400),
Habitat = c("Ártico", "Océano Atlántico", "Océano Pacífico", "Mar Mediterráneo", "Océano Atlántico")
)
head(m_m)
-Especie: Nombre en la mayoría de los casos falso.
-Familia: Relacionada a la especie.
-Longitud: Longitud del animal en “metros”.
-Peso: Peso del animal en “kg”.
-Habitat: Zona donde habita el animal.
# Instalar el paquete knitr
#install.packages("knitr")
library(knitr)
# Crear una tabla con formato utilizando knitr
kable(m_m, format = "html", caption = "Mamíferos")
| Especie | Familia | Longitud | Peso | Habitat |
|---|---|---|---|---|
| Foca-freddy | Phocidae | 2.5 | 150 | Ártico |
| Delfín-flipper | Delphinidae | 3.2 | 200 | Océano Atlántico |
| Ballena-migaloo | Balaenopteridae | 18.7 | 2500 | Océano Pacífico |
| León marino-aslan | Otariidae | 2.1 | 180 | Mar Mediterráneo |
| Orca-willy | Delphinidae | 9.5 | 400 | Océano Atlántico |
library(gt)
#>
#> Attaching package: 'gt'
#> The following object is masked from 'package:Hmisc':
#>
#> html
m_m %>%gt()
| Especie | Familia | Longitud | Peso | Habitat |
|---|---|---|---|---|
| Foca-freddy | Phocidae | 2.5 | 150 | Ártico |
| Delfín-flipper | Delphinidae | 3.2 | 200 | Océano Atlántico |
| Ballena-migaloo | Balaenopteridae | 18.7 | 2500 | Océano Pacífico |
| León marino-aslan | Otariidae | 2.1 | 180 | Mar Mediterráneo |
| Orca-willy | Delphinidae | 9.5 | 400 | Océano Atlántico |
library(gtExtras)
m_m %>%
gt() %>%
gt_hulk_col_numeric(Peso)
| Especie | Familia | Longitud | Peso | Habitat |
|---|---|---|---|---|
| Foca-freddy | Phocidae | 2.5 | 150 | Ártico |
| Delfín-flipper | Delphinidae | 3.2 | 200 | Océano Atlántico |
| Ballena-migaloo | Balaenopteridae | 18.7 | 2500 | Océano Pacífico |
| León marino-aslan | Otariidae | 2.1 | 180 | Mar Mediterráneo |
| Orca-willy | Delphinidae | 9.5 | 400 | Océano Atlántico |
m_m %>%
gt() %>%
gt_color_rows(Peso:Longitud, palette = c("white", "green"))
| Especie | Familia | Longitud | Peso | Habitat |
|---|---|---|---|---|
| Foca-freddy | Phocidae | 2.5 | 150 | Ártico |
| Delfín-flipper | Delphinidae | 3.2 | 200 | Océano Atlántico |
| Ballena-migaloo | Balaenopteridae | 18.7 | 2500 | Océano Pacífico |
| León marino-aslan | Otariidae | 2.1 | 180 | Mar Mediterráneo |
| Orca-willy | Delphinidae | 9.5 | 400 | Océano Atlántico |
head(m_m[,1:4]) %>%
gt() %>%
gt_highlight_rows(rows = 2, font_weight = "normal")
| Especie | Familia | Longitud | Peso |
|---|---|---|---|
| Foca-freddy | Phocidae | 2.5 | 150 |
| Delfín-flipper | Delphinidae | 3.2 | 200 |
| Ballena-migaloo | Balaenopteridae | 18.7 | 2500 |
| León marino-aslan | Otariidae | 2.1 | 180 |
| Orca-willy | Delphinidae | 9.5 | 400 |
m_m %>%
gt() %>%
gt_plt_bullet(column = Peso, target = Longitud)
| Especie | Familia | Peso | Habitat |
|---|---|---|---|
| Foca-freddy | Phocidae | Ártico | |
| Delfín-flipper | Delphinidae | Océano Atlántico | |
| Ballena-migaloo | Balaenopteridae | Océano Pacífico | |
| León marino-aslan | Otariidae | Mar Mediterráneo | |
| Orca-willy | Delphinidae | Océano Atlántico |
m_m%>%
dplyr::group_by(Habitat) %>%
dplyr::summarize(Tendencia = list(Longitud), .groups = "drop") %>%
gt() %>%
gt_plt_sparkline(Tendencia)
#> `geom_line()`: Each group consists of only one observation.
#> ℹ Do you need to adjust the group aesthetic?
#> `geom_line()`: Each group consists of only one observation.
#> ℹ Do you need to adjust the group aesthetic?
#> `geom_line()`: Each group consists of only one observation.
#> ℹ Do you need to adjust the group aesthetic?
| Habitat | Tendencia |
|---|---|
| Mar Mediterráneo | |
| Océano Atlántico | |
| Océano Pacífico | |
| Ártico |
m_m %>%
head() %>%
dplyr::select(Familia, Longitud) %>%
dplyr::mutate(Long_max = round(Longitud/max(Longitud), digits = 2),
Long_min = round(Longitud/min(Longitud), digits = 2)) %>%
dplyr::mutate(Long_cruda = Longitud) %>%
gt() %>%
gt_plt_bar_pct(column = Long_min, scaled = TRUE) %>%
gt_plt_bar_pct(column = Long_cruda, scaled = FALSE, fill = "blue", background = "lightblue") %>%
cols_align("center", contains("scale")) %>%
cols_width(4 ~ px(125),
5 ~ px(125))
| Familia | Longitud | Long_max | Long_min | Long_cruda |
|---|---|---|---|---|
| Phocidae | 2.5 | 0.13 | ||
| Delphinidae | 3.2 | 0.17 | ||
| Balaenopteridae | 18.7 | 1.00 | ||
| Otariidae | 2.1 | 0.11 | ||
| Delphinidae | 9.5 | 0.51 |
m_m %>%
gt()%>%
tab_header(title = "Mamíferos") %>%
tab_style(style = list(cell_fill(color = "#b2f7ef"),
cell_text(weight = "bold")),
locations = cells_body(columns = Longitud))%>%
tab_style(style = list(cell_fill(color = "#ffefb5"),
cell_text(weight = "bold")),
locations = cells_body(columns = Peso))
| Mamíferos | ||||
| Especie | Familia | Longitud | Peso | Habitat |
|---|---|---|---|---|
| Foca-freddy | Phocidae | 2.5 | 150 | Ártico |
| Delfín-flipper | Delphinidae | 3.2 | 200 | Océano Atlántico |
| Ballena-migaloo | Balaenopteridae | 18.7 | 2500 | Océano Pacífico |
| León marino-aslan | Otariidae | 2.1 | 180 | Mar Mediterráneo |
| Orca-willy | Delphinidae | 9.5 | 400 | Océano Atlántico |
library(gtExtras)
m_m %>%
gt() %>%
gt_theme_nytimes() %>%
tab_header(title = "Mamíferos")
| Mamíferos | ||||
| Especie | Familia | Longitud | Peso | Habitat |
|---|---|---|---|---|
| Foca-freddy | Phocidae | 2.5 | 150 | Ártico |
| Delfín-flipper | Delphinidae | 3.2 | 200 | Océano Atlántico |
| Ballena-migaloo | Balaenopteridae | 18.7 | 2500 | Océano Pacífico |
| León marino-aslan | Otariidae | 2.1 | 180 | Mar Mediterráneo |
| Orca-willy | Delphinidae | 9.5 | 400 | Océano Atlántico |
library(DT)
# Crear una tabla interactiva con búsqueda y ordenamiento utilizando DT
datatable(m_m)
library(formattable)
#>
#> Attaching package: 'formattable'
#> The following object is masked from 'package:gt':
#>
#> currency
#> The following object is masked from 'package:plotly':
#>
#> style
tf<-formattable(m_m, list(
Especie = color_bar("#e9c46a"),
Longitud = color_bar("#80ed99"),
Peso = color_bar("#48cae4"),
Familia = color_bar("#f28482")))
print(tf)
formattable(m_m, list(
Longitud= formatter("span",
style=x~style(color=ifelse(rank(-x)<=3,"red","gray")),
x~sprintf("%.4f (rank: %2d)", x, rank(-x))
),
Peso= formatter("span",
style=x~style(color=ifelse(x,"green","red")),
x~icontext(ifelse(x,"ok","remove"),ifelse(x,"Delphinidae","Otariidae"))
),
Habitat = formatter("span", style=x~ifelse(x=="Océano Atlántico",style(color="red",font.weight="bold"),NA)),
Especie = color_bar("pink")
))
| Especie | Familia | Longitud | Peso | Habitat |
|---|---|---|---|---|
| Foca-freddy | Phocidae | 2.5000 (rank: 4) | Delphinidae | Ártico |
| Delfín-flipper | Delphinidae | 3.2000 (rank: 3) | Delphinidae | Océano Atlántico |
| Ballena-migaloo | Balaenopteridae | 18.7000 (rank: 1) | Delphinidae | Océano Pacífico |
| León marino-aslan | Otariidae | 2.1000 (rank: 5) | Delphinidae | Mar Mediterráneo |
| Orca-willy | Delphinidae | 9.5000 (rank: 2) | Delphinidae | Océano Atlántico |
library(sparkline)
sparkline(c(1,2,7,6,5), type = "bar", barColor = "green")
Un factor (dos niveles) Test de t
¿Son todas las tasas de crecimiento diferentes entre los jóvenes y viejos?
H0: µ_viejo=µ_joven
Ha: µ_viejo≠µ_joven
t.test(y~x,data=datos, paired=FALSE, var.equal =TRUE)
t.test(y~x,data=datos, paired=FALSE, var.equal =FALSE)
Notese que var.equal= FALSE se utiliza si no tenemos
homogeneidad de varianza
¿Son todas las tasas de crecimiento iguales?
H0: µ_lento=µ_intermedio=µ_rapido
Ha: al menos un µ difiere
Normalidad
Homocedasticidad (dos opciones)
Independecia
Hipótesis
H0: Las variables siguen distribución normal
Ha: Las variables no siguen
distribución normal
Prueba de normalidad Shapiro-wilk para las variables respuesta
#>
#> F test to compare two variances
#>
#> data: growth_rate by edad_g
#> F = 0.69065, num df = 8, denom df = 38, p-value = 0.6057
#> alternative hypothesis: true ratio of variances is not equal to 1
#> 95 percent confidence interval:
#> 0.2710721 2.6578969
#> sample estimates:
#> ratio of variances
#> 0.6906472
p <- datos16 %>%
ggplot( aes(x=growth_rate, fill=edad_g)) +
geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
scale_fill_manual(values=c("#69b3a2", "#404080")) +
geom_line(stat = "density")+
facet_grid(~edad_g)
labs(fill="")
#> $fill
#> [1] ""
#>
#> attr(,"class")
#> [1] "labels"
p
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#>
#> Call:
#> lm(formula = growth_rate ~ type_g, data = datos16)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -0.245303 -0.060828 -0.005251 0.054704 0.195197
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 2.13390 0.04295 49.682 < 2e-16 ***
#> type_gLow -0.43230 0.04649 -9.299 4.81e-12 ***
#> type_gMedium -0.18777 0.05853 -3.208 0.00246 **
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.1052 on 45 degrees of freedom
#> Multiple R-squared: 0.6984, Adjusted R-squared: 0.685
#> F-statistic: 52.11 on 2 and 45 DF, p-value: 1.931e-12
#> Df Sum Sq Mean Sq F value Pr(>F)
#> type_g 2 1.1536 0.5768 52.11 1.93e-12 ***
#> Residuals 45 0.4981 0.0111
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Modelo con lm
lmanov<- lm(growth_rate~type_g, datos16)
par(mfrow=c(2,2))
plot(lmanov)
# Modelo con aov
anov<- aov(growth_rate~type_g, datos16)
par(mfrow=c(2,2))
plot(anov)
ggqqplot(datos16, "growth_rate", facet.by = c("type_g"))
var.test(growth_rate ~edad_g, data = datos16)
#>
#> F test to compare two variances
#>
#> data: growth_rate by edad_g
#> F = 0.69065, num df = 8, denom df = 38, p-value = 0.6057
#> alternative hypothesis: true ratio of variances is not equal to 1
#> 95 percent confidence interval:
#> 0.2710721 2.6578969
#> sample estimates:
#> ratio of variances
#> 0.6906472
summary(lmanov)
#>
#> Call:
#> lm(formula = growth_rate ~ type_g, data = datos16)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -0.245303 -0.060828 -0.005251 0.054704 0.195197
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 2.13390 0.04295 49.682 < 2e-16 ***
#> type_gLow -0.43230 0.04649 -9.299 4.81e-12 ***
#> type_gMedium -0.18777 0.05853 -3.208 0.00246 **
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.1052 on 45 degrees of freedom
#> Multiple R-squared: 0.6984, Adjusted R-squared: 0.685
#> F-statistic: 52.11 on 2 and 45 DF, p-value: 1.931e-12
anova(lmanov)
summary(anov)
#> Df Sum Sq Mean Sq F value Pr(>F)
#> type_g 2 1.1536 0.5768 52.11 1.93e-12 ***
#> Residuals 45 0.4981 0.0111
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Muchos ejemplos de diferentes tipos de posthoc emmeans
t <-TukeyHSD(anov)
print(t)
#> Tukey multiple comparisons of means
#> 95% family-wise confidence level
#>
#> Fit: aov(formula = growth_rate ~ type_g, data = datos16)
#>
#> $type_g
#> diff lwr upr p adj
#> Low-High -0.4322971 -0.5449630 -0.31963125 0.0000000
#> Medium-High -0.1877714 -0.3296305 -0.04591233 0.0068283
#> Medium-Low 0.2445257 0.1389530 0.35009845 0.0000035
boxplot(growth_rate~ type_g,data=datos16, ylim=c(1.3,2.6),
xlab="Type", ylab="growth_rate", col=(c("gold","darkgreen", "grey")))
tp <- extract_p(t)
groups2 <- multcompLetters(t$type_g[,4])
lets <- groups2$Letters[c(4,1:3)]
text(1:3, 2.6 ,lets)
aov.test <- datos16 %>%
anova_test(growth_rate~ type_g)
print(aov.test)
#> ANOVA Table (type II tests)
#>
#> Effect DFn DFd F p p<.05 ges
#> 1 type_g 2 45 52.113 1.93e-12 * 0.698
datos16 %>%
levene_test(growth_rate~ type_g)
Post hoc númerico
datos16 %>% tukey_hsd(growth_rate~ type_g)
pwc <- datos16 %>%
tukey_hsd(growth_rate~ type_g)
pwc
print(t)
#> Tukey multiple comparisons of means
#> 95% family-wise confidence level
#>
#> Fit: aov(formula = growth_rate ~ type_g, data = datos16)
#>
#> $type_g
#> diff lwr upr p adj
#> Low-High -0.4322971 -0.5449630 -0.31963125 0.0000000
#> Medium-High -0.1877714 -0.3296305 -0.04591233 0.0068283
#> Medium-Low 0.2445257 0.1389530 0.35009845 0.0000035
Post hoc gráfico
pwc <- pwc %>%
add_xy_position(x = "type_g")
ggboxplot(datos16, x = "type_g", y = "growth_rate", color = "type_g", palette = "jco", add = "jitter") +
stat_pvalue_manual(pwc, hide.ns = TRUE) +
labs(subtitle = get_test_label(aov.test, detailed = TRUE),
caption = get_pwc_label(pwc))
library(stargazer)
stargazer(lmanov, type = "text",
digits = 3,
star.cutoffs = c(0.05, 0.01, 0.001),
digit.separator = "")
Atención a diferencia del ejemplo anterior (t test) acá se parte del modelos (porque necesitamos los residuales) para evaluar los supuestos
m <- lm(growth_rate~age,data=datos16)
summary(m)
#>
#> Call:
#> lm(formula = growth_rate ~ age, data = datos16)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -0.2712 -0.1221 -0.0185 0.1112 0.3940
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 2.595251 0.200256 12.960 < 2e-16 ***
#> age -0.002300 0.000569 -4.043 0.000199 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.1628 on 46 degrees of freedom
#> Multiple R-squared: 0.2621, Adjusted R-squared: 0.2461
#> F-statistic: 16.34 on 1 and 46 DF, p-value: 0.0001994
anova(m)
Se miran gráficamente
autoplot(m)
Gráfica de los resultados
ggscatter(datos, x = "age", y = "growth_rate",
add = "reg.line",
conf.int = TRUE,
add.params = list(color = "black",
fill = "lightgray")
)+
stat_cor(method = "pearson", label.x =270, label.y = 2.5)
library(Hmisc)
library(tables)
stderr <- function(x) sd(x)/sqrt(length(x))
latex(
object = tabular((Species+1) ~ All(iris)* PlusMinus(mean, stderr, digits=1), data=iris)
, title = "Test"
, file=""
, size = "small"
, cdot = 3
, here = TRUE
, booktabs=TRUE
, center="centering"
)
library(formattable)
datost<- read.csv("/home/kechu/Documentos/Curso_R/tabla.csv")
formattable(datost,
align = c("l",rep("r", NCOL(datost) - 1)),
list(`Opciones` = formatter("span", style = ~ style(color = "grey", font.weight = "bold")),
`Niveles` = formatter("span", style = ~ style(color = "grey", font.weight = "bold")),
`Test_T`=color_bar("#B1CBEB"),
`Anova` =color_bar("#FA614B"),
`Regresión`=color_bar("#71CA97"),
`Ancova` =color_bar("#3E7DCC")))
| Tipo | Niveles | Test_T | Anova | Regresión | Ancova |
|---|---|---|---|---|---|
| 2 | x | ||||
| Modelo | 3,4,5…. | x | |||
| continúa | x | ||||
| Factor+continúa | x | ||||
| Independencia | x | x | x | x | |
| Supuestos | Normalidad | x | x | x | x |
| Homocedasticidad | x(*) | x(*) | x | x | |
| Línealidad | x | x |
(*) Hay comandos alternativos fáciles para resolver el incumplimiento del supuesto
library(gtsummary)
table1 <-
datos16 %>%
tbl_summary(include = c(growth_rate, type_g, edad_g))
print(table1)
table2 <-
tbl_summary(
datos16,
include = c(growth_rate,standard_length, body_depth, age),
by = type_g, # split table by group
missing = "no" # don't list missing data separately
) %>%
add_n() %>% # add column with total number of non-missing observations
add_p() %>% # test for a difference between groups
modify_header(label = "**Variable**") %>% # update the column header
bold_labels()
print(table2)
mod1 <- lm(growth_rate ~ body_depth + age + type_g, datos16)
t1 <- tbl_regression(mod1)
print(t1)
p1 <- datos16 %>%
ggplot(aes(age, growth_rate, size = standard_length, color = edad_g)) +
geom_point() +
labs(x="Tasa de crecimeinto", y="Edad (días)", title="Parametros corporales")+
theme_cowplot()
p1
p1_animation<- p1 +
transition_time(year) +
labs(subtitle = "Year")
p1_animation
p1_animation <- p1 +
transition_time(year) +
labs(subtitle = "Year") +
shadow_wake(wake_length = 0.1, alpha = FALSE)
p1_animation
p1_animation_2 <- p1 +
geom_text(aes(x = min(age), y = min(growth_rate), label = as.factor(year)) ,
hjust=-1.5, vjust = -0.2, alpha = 0.2, col = "gray", size = 20) +
transition_states(as.factor(year), state_length = 0)
p1_animation_2
p <- ggplot(datos16, aes(age, growth_rate, color = edad_g)) +
geom_point(aes(size = standard_length, frame = year)) +
labs(x="Tasa de crecimeinto", y="Edad (días)", title="Parametros corporales")+
theme_cowplot()
ggplotly(p)