library("readxl")
library("ggplot2") #Best plots
library("ggrepel") #Avoid overlapping labels
library("dplyr")

#set working directory where all files are located
getwd()
setwd("D:\\projects\\ex89")
getwd()
df <- read_excel("data.xlsx")

i=3
k_array = c(2,4,6)
label_array = c("DC","SC","PC")

k <- k_array[i]
label <- label_array[i]

df = df[c(1,k,k+1)]
colnames(df) <- c("gene","log_fc","pvalue")
df <- df[- grep("NA", df$gene),]


# Convert directly in the aes()
p <- ggplot(data=df, aes(x=log_fc, y=-log10(pvalue))) + geom_point()

# Add more simple "theme"
p <- ggplot(data=df, aes(x=log_fc, y=-log10(pvalue))) +
geom_point() + theme_minimal()

# Add vertical lines for log2FoldChange thresholds,
# and one horizontal line for the p-value threshold 
p2 <- p + geom_vline(xintercept=c(-0.6, 0.6), col="red") +
    geom_hline(yintercept=-log10(0.05), col="red")

# add a column of NAs
df$diffexpressed <- "NO"
# if log2Foldchange > 0.6 and pvalue < 0.05, set as "UP" 
df$diffexpressed[df$log_fc > 0.6 & df$pvalue < 0.05] <- "UP"
# if log_fc < -0.6 and pvalue < 0.05, set as "DOWN"
df$diffexpressed[df$log_fc < -0.6 & df$pvalue < 0.05] <- "DOWN"

# Re-plot but this time color the points with "diffexpressed"
p <- ggplot(data=df, aes(x=log_fc, y=-log10(pvalue), col=diffexpressed)) +
geom_point() + theme_minimal() + ggtitle(label)

# Add lines as before...
p2 <- p + geom_vline(xintercept=c(-0.6, 0.6), col="red") +
geom_hline(yintercept=-log10(0.05), col="red")

## Change point color 
# 1. by default, it is assigned to the categories in an alphabetical order):
p3 <- p2 + scale_color_manual(values=c("blue", "grey", "red"))

# 2. to automate a bit: ceate a named vector: the values are the colors to
# be used, the names are the categories they will be assigned to:
mycolors <- c("blue", "red", "grey")
names(mycolors) <- c("DOWN", "UP", "NO")
p3 <- p2 + scale_colour_manual(values = mycolors)

cutoff=sort(df$pvalue)[20] #the 20th smallest value of res$padj
best = subset(df, df$pvalue <= cutoff)

mustHaves <- c("S100A9","S100A8","CCR5","RARG")
interest = df[with(df, gene %in% mustHaves),]

p4 <- p3 + geom_point(data=interest,col="black")
p5 <- p4+geom_text_repel(data=best, aes(label=gene)) + 
geom_text_repel(data=interest, aes(label=gene), fontface = 'bold',col="black") +
ylim(0,4.6)

#In case you want to easily save to disk
filename = paste("Volcanoplot_", label, ".jpeg",  sep="")
ggsave(filename, device="jpeg")

library(xlsx) #load the package
filename = paste("Best_", label, ".xlsx",  sep="")
write.xlsx(x = best, file = filename, sheetName = "TestSheet")

filename = paste("Interest_", label, ".xlsx",  sep="")
write.xlsx(x = interest, file = filename,sheetName = "TestSheet")