Profesores: Carlos M. González Alcón - Carlos Pérez González
(Dpto. Matemáticas, Estadística e Investigación Operativa - Universidad de La Laguna)
Librería RCommander y técnicas de análisis estadístico.
install.packages('Rcmdr')
library(Rcmdr) # tarda un poco puesto que conviene instalar las librerías adicionales que solicita
Commander()
y
con los cuantiles 25%,50%,75%,100% de la normal de media 0 y sd. 1.x
con los cuantiles 25%,50%,75%,100% de la muestra (se utiliza la función quantile()
).plot(x,y); abline(0,1)
* http://cpgonzal.github.io/cursoIntroR/ddt.txt
* http://dl.dropboxusercontent.com/u/17677514/ddt.txt
#require(devtools)
#ddt<-read.table("ddt.txt",header=TRUE,sep="",dec=".")
devtools::source_gist("1e7ee71e79346ffdd5a5",filename = "plotHistBox.R")
plotHistBox(ddt$length)
h1<-hist(ddt$weight, col="grey60",main="Histograma de weight",xlab="weight")
par(mfrow=c(1,1) )
with(ddt, hist(weight[species==1], breaks=h1$breaks, col="blue",main="Histograma de weight",xlab="weight" ))
with(ddt, hist(weight[species==2], breaks=h1$breaks, col="red",add=TRUE ))
with(ddt, hist(weight[species==3], breaks=h1$breaks, col="green",add=TRUE ))
legend( "topright", sort(levels(ddt$species_name),decreasing = TRUE) , fill=c("blue", "red","green") )
boxplot( ddt$DDT_conc, main="Diagrama box-and-whisker de DDT_conc",col="gray")
qqnorm( ddt$DDT_conc, main="Q-q plot de DDT_conc")
qqline(ddt$DDT_conc, col="red", lty="dashed")
require(car)
scatterplot(length~weight | group, regLine=TRUE, smooth=FALSE,
boxplots=FALSE, by.groups=TRUE, reset.par=FALSE,
data=ddt)
require(lattice)
xyplot(length~weight , groups=group, type="p", pch=16,
auto.key=list(border=TRUE),data=ddt)
scatterplotMatrix(~DDT_conc+length+weight, regLine=TRUE, smooth=FALSE,
diagonal = TRUE, data=ddt)
require(Rcmdr)
plotMeans(ddt$DDT_conc, ddt$species_name, error.bars="conf.int", level=0.95)
Comparar el peso de los peces del grupo 1 con el peso de los del grupo 2.
shapiro.test(ddt$weight)
##
## Shapiro-Wilk normality test
##
## data: ddt$weight
## W = 0.9825, p-value = 0.06299
Si hay dudas en la normalidad, el t-test no se puede aplicar y hay que acudir a alternativas no paramétricas.
Comparar el peso de los peces del grupo 1 con el peso de los del grupo 2.
bartlett.test(weight ~ group, data=ddt)
##
## Bartlett test of homogeneity of variances
##
## data: weight by group
## Bartlett's K-squared = 0.13287, df = 1, p-value = 0.7155
Si las varianzas entre grupos son iguales, se debe especificar en opciones del t-test (aprox. Welch).
# Comprobamos si la varianza es constante
bartlett.test(weight ~ location_name, data=ddt)
##
## Bartlett test of homogeneity of variances
##
## data: weight by location_name
## Bartlett's K-squared = 0.78832, df = 2, p-value = 0.6742
# En caso de homoc. podemos hacer anova
anova.mod1 <- aov(weight ~ location_name, data=ddt)
summary(anova.mod1)
## Df Sum Sq Mean Sq F value Pr(>F)
## location_name 2 942700 471350 3.438 0.0349 *
## Residuals 141 19332838 137112
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Si no se asume var. cte. o no hay normalidad, recurrimos al test K.-W.
kruskal.test(weight ~ location_name, data=ddt)
##
## Kruskal-Wallis rank sum test
##
## data: weight by location_name
## Kruskal-Wallis chi-squared = 6.13, df = 2, p-value = 0.04665
anova.mod1 <- aov(weight ~ location_name, data=ddt)
summary(glht(anova.mod1, linfct = mcp(location_name = "Tukey"))) # pairwise tests
regmod.1 <- lm(weight~length, data=ddt)
summary(regmod.1)
##
## Call:
## lm(formula = weight ~ length, data = ddt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -989.96 -189.45 -49.51 193.68 923.22
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -483.672 150.497 -3.214 0.00162 **
## length 35.816 3.471 10.319 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 285.7 on 142 degrees of freedom
## Multiple R-squared: 0.4285, Adjusted R-squared: 0.4245
## F-statistic: 106.5 on 1 and 142 DF, p-value: < 2.2e-16
cor(ddt[,c("length","weight")], use="complete")
## length weight
## length 1.0000000 0.6546113
## weight 0.6546113 1.0000000
plot()
y abline()
plot(ddt$length, ddt$weight, main="Weight vs. length", xlab="lenght", ylab="weight",
pch=20)
abline(regmod.1,col="red",lty="dashed")
par(mfrow=c(2,2))
plot(regmod.1)
ddt$length.3 <- ddt$length^3
ddt$sqrt.weight <- sqrt(ddt$weight)
source_gist("f9b06d411675c1702ccc",filename="plotConf.R")
plotConf(sqrt.weight~length.3, ddt)
require(devtools)
source_gist("a4b2ed204d01bc12d952",filename="plotCluster.R")
par(mfrow=c(1,2))
par(mfrow=c(1,2))
plotCluster(1,ddt[,c("length","weight","DDT_conc")],ddt$species_name)
plotCluster(2,ddt[,c("length","weight","DDT_conc")],ddt$species_name)