This section is to help you for some basic analysis that you can do on R for the data that you have collected from the field.

Load the following packages in R:

install.packages(c("readxl", "dplyr", "ggplot2"))
# Load the packages
library(readxl)    # For reading Excel files
library(dplyr)     # For data manipulation
library(ggplot2)   # For plotting

Loading your data:

# Read data from Excel (.xlsx)
data <- read_excel("path_to_your_file.xlsx")

# View the structure and summary
str(data)
summary(data)
head(data)

# Check missing values
colSums(is.na(data))

Basic cleaning:

# Drop rows with any missing values
clean_data <- data %>% drop_na()

# Convert site name to factor
clean_data$Site <- as.factor(clean_data$Site)

# Rename columns for clarity (if needed)
clean_data <- clean_data %>%
  rename(
    SoilMoisture = `Soil Moisture (%)`,
    Temp = Temperature
  )

Normality Test:

# For example, Test if soil moisture is normally distributed
shapiro.test(data$Soil_Moisture)

Visualisation:

# Histogram 
hist(data$Soil_Moisture, 
     main = "Histogram of Soil Moisture",
     xlab = "Soil Moisture (%)",
     col = "lightblue", 
     border = "black")
     
# Scatter Plot
plot(data$Temperature, data$Soil_Moisture,
     main = "Soil Moisture vs Temperature",
     xlab = "Temperature (°C)",
     ylab = "Soil Moisture (%)",
     pch = 19,
     col = "forestgreen")

# Scatter plot with ggplot2

ggplot(clean_data, aes(x = Temp, y = SoilMoisture)) +
  geom_point(color = "forestgreen", size = 2) +
  labs(title = "Soil Moisture vs Temperature",
       x = "Temperature (°C)", y = "Soil Moisture (%)") +
  theme_minimal()