#########################L03-Hypothesis Testing#########################

#Lets Practice 01

# Create a random data sample
sample <- c(22, 25, 28, 32, 27, 30, 35, 38, 31, 29)

# Define a reference value and and level of significance (mean that will be tested)
reference_value <- 30

# Run the one-sample t-test
result_test <- t.test(sample, mu = reference_value)
result_test

#interpret your results

#Lets Practice 02

yields <- c(3.5, 4.3, 3.3, 5.7, 4.4, 3.3, 4.6, 4.8, 4.7, 3.8)

mean(yields)
var(yields)
sd(yields)

#by equation t-test
numerator <- (mean(yields)-4)
numerator 
denominator <- (sd(yields)/sqrt(10))
denominator

t_stat <- numerator/denominator
t_stat

#we can calculate the p-value

?pt
?abs
##abs function is used to calculate the absolute value of a number, that is, to obtain the positive value of a number


2*pt(abs(t_stat), df = 9, lower.tail = FALSE)

#or
t.test(yields, mu =4)

#Let´s Practice 03

# Sample data for Group A and Group B
group_a <- c(25, 30, 35, 40, 45)
group_b <- c(20, 28, 32, 38, 42)

# Perform a two-sample t-test
t_test_result <- t.test(group_a, group_b)

t_test_result



#Let´s Practice 04

colonies_1 <- c(0.4, 0.7, 0.9, 0.3, 0.9, 0.8, 0.8, 
                1.2, 0.3, 1.3, 0.5, 0.3, 0.5, 0.9)

colonies_2 <- c(1.2, 1.0, 0.7, 0.8, 1.6, 1.2, 0.9,
                0.7, 1.0, 0.4, 0.9, 0.8, 1.6, 1.5)

mean(colonies_1)
mean(colonies_2)

var(colonies_1)
var(colonies_2)
mediavariancia <- (var(colonies_1) + var(colonies_2))/2
mediavariancia

#or pooled estimate (joint)
(pooled_var <- (var(colonies_1) + var(colonies_2))/2)

#t-stat

(mean_diff <- mean(colonies_1) - mean(colonies_2))

(t_stat <- mean_diff/sqrt(pooled_var * 2/14))

#assess its 𝑝−value

2 * pt(abs(t_stat), df = 26, lower.tail = FALSE)


t.test(colonies_1, colonies_2, var.equal = TRUE)


#Paired Samples t-test: Repeated Experiments

# Create dataframe
data <- data.frame(
  Number = 1:10, 
  height_plants1 = c(120, 122, 118, 125, 130, 128, 123, 126, 119, 121),
  height_plants2 = c(115, 121, 117, 124, 129, 126, 122, 125, 118, 120) 
)

# Visualize the data
print(data)

# Perform the t-test for paired samples
result_test <- t.test(data$height_plants1, data$height_plants2, paired = TRUE)

print(result_teste)

hist(t_stat)



#False Positive

curve(1-(1-0.01)^x, xlim = c(0,1000))
curve(1-(1-0.05)^x, xlim = c(0,1000))

#False Discovery Rate (FDR): 

# p_values list
p_values <- c(0.02, 0.03, 0.05, 0.07, 0.1, 0.01)
p_values

# Order the 𝑝-values in ascending order
p_values_order <- sort(p_values)
p_values_order

# Calculate adjusted FDR using the BH method
fdr_asjusted <- p.adjust(p_values_order, method = "BH")
fdr_asjusted