##Biostatistics - Basic Concepts## #Setting up the workspace# # go to Session - Set Working Directory - Chose Directory...- RGO5860 # you can use the website https://www.statmethods.net/ as guide to find R codes. ############## # Week 1 quiz# ############## #Loading the data# week1<-read.csv("week1_data.csv", sep=";", header = TRUE) #This database contains data from 130 women with ovarian cancer. There are 9 variables: #id=patient id; #age = age at diagnosis; #preg = number of pregnancies; #del = number of deliveries; #misc = number of miscariages; #surgery = type of surgery; #stage: tumor staging; #survival = patient survival in months; #type = tumor type #Exploratory data analysis## # to view the database structure use the following command: View(week1) #Go to the week1 quiz and answer the questions 1-4. #Write commands to calculate the mean of patients age and the median of the number of pregnancies and answer the question 5. mean(week1$age) median (week1$preg) #write the command to answer the quiz #6 and #7 ##Graphics## #write the command to draw a graph to verify the relationship between the number of pregnancies and the number of deliveries and answer the quiz #8. plot(week1$preg, week1$del) #write a command to draw a graph to compare the age between tumor types and answer the quiz #9 boxplot(week1$age~week1$type) #write the codes to draw graphs that allows to evaluate the distribution of frequencies of the variables age and survival and answer the quiz #10 hist(week1$age) hist(week1$survival) ############## # Week 2 quiz# ############## week2<-read.csv("week2_data.csv", sep=";", header = TRUE) #In the week2_data file, the variable CA125b is the value of ca125 before the treatment and CA125a the value after the treatment. Is there any association between the treatment an ca125 value? Write down the set of codes you have used to answer this question. t.test(week2$CA125b, week2$CA125a, paired=TRUE) #In the week2_data file, is there any association between the type of tumor and age? Write down the set of codes you have used to answer this question. t.test(week2$age~week2$type) # In the week2_data file, is there any association between the type of tumor and surgery? Write down the set of codes you have used to answer this question. mytable<- table(week2$surgery, week2$type) chisq.test(mytable) ############## # Week 3 quiz# ############## #If the mean of CA 125 value in normal population is 35, Using data from week 2, what is the probability of a H0= "CA 125 in ovarian cancer patients is not different of CA 125 in normal women" be true? #Write down the code to answer quiz #8 and quiz #9. t.test(week2$CA125b,mu=35) ############## # Week 4 quiz# ############## #Quiz#6 #Based on the graph assessment, what is the distribution of the variable "week2$CA125a"? hist(week2$CA125a) #Quiz#7 #Based on Shapiro-Wilk normality test, what is the probability of H0=true for the variable "week2$CA125a"? shapiro.test(week2$CA125a) #Quiz#9 # To test H0= There is no difference in CA125 values (week2$CA125a) between type I and type II tumors, with is the most appropriate test? #Describe the results summary(week2$CA125a[week2$type=="I"]) summary(week2$CA125a[week2$type=="II"]) boxplot(week2$CA125a ~ week2$type) #Test the normality of the dependent variable in both groups hist(week2$CA125a[week2$type=="I"]) hist(week2$CA125a[week2$type=="II"]) shapiro.test(week2$CA125a[week2$type=="I"]) shapiro.test(week2$CA125a[week2$type=="II"]) # Then choose the test #Quiz#10 #What is the conclusion of the test from quiz #9? wilcox.test(week2$CA125a ~ week2$type)