############################################################################# # ESTATISTICA DESCRITIVA # ############################################################################# #### Setar diretorio #### # Atalho -> ctrl + shift + h # Indicando o caminho -> setwd("C:/Users/Downloads") # Seta automaticamente o diretorio onde o arquivo esta salvo setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # verificar diretorio -> getwd() ############################################################################# #### Entrando com os dados #### # os dados devem estar na pasta indicado no diretorio # neste caso, na pasta C:/Users/Downloads # leitura de dados em arquivo externo dados = read.table("dadosProgramaR.txt", dec = ",", header = T) # outra opcao para entrar com os dados: # dados = read.table(file = file.choose(), dec = "," ,header = T) # Extrai as variaveis do objeto dados. Dessa forma, as variaveis do # conjunto de dados podem ser acessadas simplesmente fornecendo seus nomes attach(dados) # Obs.: nao recomendado se for trabalhar com mais de um conjunto de # dados no mesmo arquivo R. # Extrai os nomes das variaveis do objeto dados names(dados) # Extrai as seis primeiras linhas do conjunto de dados head(dados) ############################################################################# ############################# DADOS QUALITATIVOS ############################ #### Tabela de frequencia para dados qualitativos #### table(EsT_Civ) table(G_Int) table(EsT_Civ, G_Int) table(EsT_Civ, G_Int, R_proced) #### Tabela de proporcoes #### prop.table(table(EsT_Civ)) prop.table(table(EsT_Civ, G_Int)) # Proporcao segundo linha prop.table(table(EsT_Civ, G_Int), 1) # Proporcao segundo coluna prop.table(table(EsT_Civ, G_Int), 2) #### Grafico de Barras #### barplot(table(G_Int)) ## personalizando ## barplot(table(EsT_Civ), col = c("green", "red"), ylim = c(0, 25), space = 0.8, width = c(0.2, 0.2), main = "Proporcao de funcionario por estado civil", xlab = "Estado Civil", ylab = "Proporcao de funcionarios") # Adicionando as proporcoes text(locator(n = 2), c("56%", "44%")) # ou barplot(table(EsT_Civ), ylim = c(0, 25), space = 0.8, width = c(0.2, 0.2), col = c("green", "red"), legend = c("56%", "44%"), main = "Proporcao de funcionario por estado civil", xlab = "Estado Civil", ylab = "Proporcao de funcionarios") #### Grafico de Setores ou Pizza #### pie(table(EsT_Civ), col = c("red", "yellow"), labels = c("Casado (56%)", "Solteiro (44%)"), main = "Distribuicao dos Funcionarios por Estado Civil") pie(table(G_Int), col = c("red", "yellow", "green"), labels = c("1 Grau (33,3%)", "2 Grau (50,0%)", "Superior (15,7%)")) title(main = "Distribuicao dos Funcionarios por Grau de Instrucao") #### Grafico de Pareto #### if(!require(qcc)) install.packages("qcc", dep = T) library(qcc) pareto.chart(table(EsT_Civ)) pareto.chart(table(G_Int)) ############################################################################# ############################ DADOS QUANTITATIVOS ############################ #### Medidas de posicao e dispersao #### # (que sao calculadas em variaveis quantitativas) # Media mean(Salario) # Variancia var(Salario) # Desvio padrao sd(Salario) # Mediana median(Salario) # Quantis quantile(Salario, 0.9) # Quartis quantile(Salario, c(0.25,0.5,0.75)) # Extrai as medidas descritivas das variaveis summary(dados) #### Funcao que determina as Medidas de Posicao e dispersao #### meuResumo <- function(x) { s <- c(length(x), mean(x, na.rm = T), median(x, na.rm = T), sd(x, na.rm = T), var(x, na.rm = T), length(which(is.na(x)))) names(s) <- c("n", "media", "mediana", "desvio Padrao", "variancia", "NA") return(s) } meuResumo(Salario) #### Grafico de dados discretos #### plot(table(N_filhos), xlab = "Numero de filhos", ylab = "Numero de Funcionarios") hist(N_filhos) #### Grafico de pontos #### stripchart(Salario, xlab = "Salario (em s.m.)", pch = 20, method = "stack") abline(h = 0.98) points(mean(Salario), 0.93, pch = 17, col = "red", cex = 2) #### Grafico de Histograma #### hist(Salario, breaks = c(4,8,12,16,20,24), xlab = "Sal?rio", ylab = "Densidade", prob = T, col = "yellow", ylim = c(0, 0.1), main = "Dist. de sal?rio dos funcion?rios da empresa", labels = c("27,8%", "33,3%", "22,2%", "13,8%", "2,8%")) # ou h = hist(Salario, breaks = c(4,8,12,16,20,24), xlab = "Sal?rio", ylab = "N?mero de Funcion?rios", col = "yellow", axes = F, right = F, main = "Dist. de sal?rio dos funcion?rios da empresa", labels = c("27,8%", "33,3%", "22,2%", "13,8%", "2,8%")) axis(1, h$breaks); axis(2, h$counts) #### Tabela de Frequencia #### TDF = hist(Salario, breaks = c(4,8,12,16,20,24), right = F, plot = F) # Frequencia absoluta fabs = TDF$counts # Frequencia relativa fr = fabs/length(Salario) # Construindo a tabela manualmente saida = cbind(fabs, fr) dimnames(saida) = list(c("4|-8", "8|-12", "12|-16", "16|-20", "20|-24"), c("f", "fr")) saida #### Tabela de Frequencia usando a funcao cut #### fabs = table(cut(Salario, breaks = seq(4,24,4), right = F, labels = c("4|-8", "8|-12", "12|-16", "16|-20", "20|-24"))) fr = fabs/length(Salario) saida = cbind(fabs, fr) dimnames(saida) = list(c("4|-8", "8|-12", "12|-16", "16|-20", "20|-24"), c("f", "fr")) saida #### Diagrama de Caixas #### boxplot(Salario, col = "yellow", ylab = "Salario") boxplot(Salario, notch = T, xlab = "Salario", horizontal = T, col = "green") boxplot(Salario ~ EsT_Civ, xlab = "Salario", col = c("green", "yellow")) boxplot(Salario ~ G_Int, xlab = "Grau de Instrucao", ylab = "Salario", names = c("1 Grau", "2 Grau", "Superior"), main = "Diagrama dos salarios dos func. por grau de instrucao") #### Analise bivariada (quantitativo) #### plot(Salario[complete.cases(N_filhos)], N_filhos[complete.cases(N_filhos)], xlab = "Salario", ylab = "N de filhos", pch = 20) # Correlacao cor(Salario, N_filhos) x = c(1, 2, 3, 4); y = c(2, 4, 6, 8.5) cor(x, y) plot(x, y) lines(lowess(x, y), col = "blue")