{"id":579,"date":"2020-06-15T01:48:58","date_gmt":"2020-06-15T08:48:58","guid":{"rendered":"http:\/\/gantovnik.com\/bio-tips\/?p=579"},"modified":"2020-06-15T01:49:22","modified_gmt":"2020-06-15T08:49:22","slug":"84","status":"publish","type":"post","link":"https:\/\/gantovnik.com\/bio-tips\/2020\/06\/84\/","title":{"rendered":"#84 Volcano plot for gene expression using R"},"content":{"rendered":"<p><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/06\/Volcanoplot_DC.jpeg?resize=1080%2C1078\" alt=\"\" width=\"1080\" height=\"1078\" class=\"alignnone size-full wp-image-580\" srcset=\"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/06\/Volcanoplot_DC.jpeg?w=2100&amp;ssl=1 2100w, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/06\/Volcanoplot_DC.jpeg?resize=300%2C300&amp;ssl=1 300w, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/06\/Volcanoplot_DC.jpeg?resize=1024%2C1022&amp;ssl=1 1024w, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/06\/Volcanoplot_DC.jpeg?resize=150%2C150&amp;ssl=1 150w, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/06\/Volcanoplot_DC.jpeg?resize=768%2C767&amp;ssl=1 768w, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/06\/Volcanoplot_DC.jpeg?resize=1536%2C1533&amp;ssl=1 1536w, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/06\/Volcanoplot_DC.jpeg?resize=2048%2C2044&amp;ssl=1 2048w, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/06\/Volcanoplot_DC.jpeg?resize=1200%2C1198&amp;ssl=1 1200w\" sizes=\"(max-width: 1080px) 100vw, 1080px\" \/><\/p>\n<pre class=\"brush: python; title: ; notranslate\" title=\"\">\r\nlibrary(&quot;readxl&quot;)\r\nlibrary(&quot;ggplot2&quot;) #Best plots\r\nlibrary(&quot;ggrepel&quot;) #Avoid overlapping labels\r\nlibrary(&quot;dplyr&quot;)\r\n\r\n#set working directory where all files are located\r\ngetwd()\r\nsetwd(&quot;D:\\\\projects\\\\ex89&quot;)\r\ngetwd()\r\ndf &lt;- read_excel(&quot;data.xlsx&quot;)\r\n\r\ni=3\r\nk_array = c(2,4,6)\r\nlabel_array = c(&quot;DC&quot;,&quot;SC&quot;,&quot;PC&quot;)\r\n\r\nk &lt;- k_array&#x5B;i]\r\nlabel &lt;- label_array&#x5B;i]\r\n\r\ndf = df&#x5B;c(1,k,k+1)]\r\ncolnames(df) &lt;- c(&quot;gene&quot;,&quot;log_fc&quot;,&quot;pvalue&quot;)\r\ndf &lt;- df&#x5B;- grep(&quot;NA&quot;, df$gene),]\r\n\r\n\r\n# Convert directly in the aes()\r\np &lt;- ggplot(data=df, aes(x=log_fc, y=-log10(pvalue))) + geom_point()\r\n\r\n# Add more simple &quot;theme&quot;\r\np &lt;- ggplot(data=df, aes(x=log_fc, y=-log10(pvalue))) +\r\ngeom_point() + theme_minimal()\r\n\r\n# Add vertical lines for log2FoldChange thresholds,\r\n# and one horizontal line for the p-value threshold \r\np2 &lt;- p + geom_vline(xintercept=c(-0.6, 0.6), col=&quot;red&quot;) +\r\n    geom_hline(yintercept=-log10(0.05), col=&quot;red&quot;)\r\n\r\n# add a column of NAs\r\ndf$diffexpressed &lt;- &quot;NO&quot;\r\n# if log2Foldchange &gt; 0.6 and pvalue &lt; 0.05, set as &quot;UP&quot; \r\ndf$diffexpressed&#x5B;df$log_fc &gt; 0.6 &amp; df$pvalue &lt; 0.05] &lt;- &quot;UP&quot;\r\n# if log_fc &lt; -0.6 and pvalue &lt; 0.05, set as &quot;DOWN&quot;\r\ndf$diffexpressed&#x5B;df$log_fc &lt; -0.6 &amp; df$pvalue &lt; 0.05] &lt;- &quot;DOWN&quot;\r\n\r\n# Re-plot but this time color the points with &quot;diffexpressed&quot;\r\np &lt;- ggplot(data=df, aes(x=log_fc, y=-log10(pvalue), col=diffexpressed)) +\r\ngeom_point() + theme_minimal() + ggtitle(label)\r\n\r\n# Add lines as before...\r\np2 &lt;- p + geom_vline(xintercept=c(-0.6, 0.6), col=&quot;red&quot;) +\r\ngeom_hline(yintercept=-log10(0.05), col=&quot;red&quot;)\r\n\r\n## Change point color \r\n# 1. by default, it is assigned to the categories in an alphabetical order):\r\np3 &lt;- p2 + scale_color_manual(values=c(&quot;blue&quot;, &quot;grey&quot;, &quot;red&quot;))\r\n\r\n# 2. to automate a bit: ceate a named vector: the values are the colors to\r\n# be used, the names are the categories they will be assigned to:\r\nmycolors &lt;- c(&quot;blue&quot;, &quot;red&quot;, &quot;grey&quot;)\r\nnames(mycolors) &lt;- c(&quot;DOWN&quot;, &quot;UP&quot;, &quot;NO&quot;)\r\np3 &lt;- p2 + scale_colour_manual(values = mycolors)\r\n\r\ncutoff=sort(df$pvalue)&#x5B;20] #the 20th smallest value of res$padj\r\nbest = subset(df, df$pvalue &lt;= cutoff)\r\n\r\nmustHaves &lt;- c(&quot;S100A9&quot;,&quot;S100A8&quot;,&quot;CCR5&quot;,&quot;RARG&quot;)\r\ninterest = df&#x5B;with(df, gene %in% mustHaves),]\r\n\r\np4 &lt;- p3 + geom_point(data=interest,col=&quot;black&quot;)\r\np5 &lt;- p4+geom_text_repel(data=best, aes(label=gene)) + \r\ngeom_text_repel(data=interest, aes(label=gene), fontface = 'bold',col=&quot;black&quot;) +\r\nylim(0,4.6)\r\n\r\n#In case you want to easily save to disk\r\nfilename = paste(&quot;Volcanoplot_&quot;, label, &quot;.jpeg&quot;,  sep=&quot;&quot;)\r\nggsave(filename, device=&quot;jpeg&quot;)\r\n\r\nlibrary(xlsx) #load the package\r\nfilename = paste(&quot;Best_&quot;, label, &quot;.xlsx&quot;,  sep=&quot;&quot;)\r\nwrite.xlsx(x = best, file = filename, sheetName = &quot;TestSheet&quot;)\r\n\r\nfilename = paste(&quot;Interest_&quot;, label, &quot;.xlsx&quot;,  sep=&quot;&quot;)\r\nwrite.xlsx(x = interest, file = filename,sheetName = &quot;TestSheet&quot;)\r\n<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>library(&quot;readxl&quot;) library(&quot;ggplot2&quot;) #Best plots library(&quot;ggrepel&quot;) #Avoid overlapping labels library(&quot;dplyr&quot;) #set working directory where all files are located getwd() setwd(&quot;D:\\\\projects\\\\ex89&quot;) getwd() df &lt;- read_excel(&quot;data.xlsx&quot;) i=3 k_array = c(2,4,6) label_array = c(&quot;DC&quot;,&quot;SC&quot;,&quot;PC&quot;) k &lt;- k_array&#x5B;i] label &lt;- label_array&#x5B;i] df = df&#x5B;c(1,k,k+1)] colnames(df) &lt;- c(&quot;gene&quot;,&quot;log_fc&quot;,&quot;pvalue&quot;) df &lt;- df&#x5B;- grep(&quot;NA&quot;, df$gene),] # Convert directly in the aes() p &lt;- [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"nf_dc_page":"","_et_pb_use_builder":"","_et_pb_old_content":"","_et_gb_content_width":"","_lmt_disableupdate":"yes","_lmt_disable":"","jetpack_post_was_ever_published":false,"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[30,29],"tags":[],"class_list":["post-579","post","type-post","status-publish","format-standard","hentry","category-bioinformatics","category-r"],"modified_by":"gantovnik","jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"jetpack_shortlink":"https:\/\/wp.me\/s8bH0k-84","jetpack_likes_enabled":true,"jetpack-related-posts":[{"id":1912,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/08\/372-tsa-data-number-of-passengers-at-usa-airport-checkpoints\/","url_meta":{"origin":579,"position":0},"title":"#372 TSA data: Number of passengers at USA airport checkpoints","author":"gantovnik","date":"2023-08-09","format":false,"excerpt":"Good news for the aerospace industry because the number of passengers passing through TSA checkpoints this year is the same as in 2019 (before COVID-19). [code language=\"python\"] import pandas as pd import numpy as np import math import matplotlib.pyplot as plt from plotnine import ggplot, geom_point, aes, stat_smooth, facet_wrap,geom_smooth,labs, scale_x_continuous,geom_line\u2026","rel":"","context":"In &quot;aerospace&quot;","block_context":{"text":"aerospace","link":"https:\/\/gantovnik.com\/bio-tips\/category\/aerospace\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2023\/08\/tsa_2023_08.png?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2023\/08\/tsa_2023_08.png?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2023\/08\/tsa_2023_08.png?resize=525%2C300&ssl=1 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2023\/08\/tsa_2023_08.png?resize=700%2C400&ssl=1 2x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2023\/08\/tsa_2023_08.png?resize=1050%2C600&ssl=1 3x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2023\/08\/tsa_2023_08.png?resize=1400%2C800&ssl=1 4x"},"classes":[]},{"id":1452,"url":"https:\/\/gantovnik.com\/bio-tips\/2022\/05\/210-parametric-curve-in-3d-2-2-2-2-2-2-2-2-2-2-2-2-2-3-2-2-2-2-2-2-2-2-2-3-2-2-2-2\/","url_meta":{"origin":579,"position":1},"title":"#280 TSA data plot using R","author":"gantovnik","date":"2022-05-03","format":false,"excerpt":"Number of passengers at USA Airport TSA checkpoints for 2019, 2020, 2021 and 2022 [code language=\"python\"] install.packages('xml2') install.packages('rvest') install.packages('tidyverse') install.packages('janitor') #install.packages('themeani') #install.packages(\"ggplot2\") #install.packages(\"magrittr\") #install.packages(\"dplyr\") library('xml2') library(rvest) library(tidyverse) library(ggplot2) library(dplyr) library('xml2') library(rvest) library(tidyverse) #read_html(\"https:\/\/www.tsa.gov\/coronavirus\/passenger-throughput?page=0\") -> myurl #html_table(myurl, header = TRUE, fill = TRUE) -> tsa #working directory setwd(\"D:\/projects\/wordpress\/ex280_tsa\") tsa <- read.table(\"data_tsa.txt\",\u2026","rel":"","context":"In &quot;R&quot;","block_context":{"text":"R","link":"https:\/\/gantovnik.com\/bio-tips\/category\/r\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2022\/05\/tsa_2022_05-1.png?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2022\/05\/tsa_2022_05-1.png?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2022\/05\/tsa_2022_05-1.png?resize=525%2C300&ssl=1 1.5x"},"classes":[]},{"id":2158,"url":"https:\/\/gantovnik.com\/bio-tips\/2024\/05\/421-plot-a-pandas-data-frame-for-loops-on-columns\/","url_meta":{"origin":579,"position":2},"title":"#421 Plot a pandas data frame for loops on columns","author":"gantovnik","date":"2024-05-04","format":false,"excerpt":"[code language=\"python\"] import numpy as np import pandas as pd import matplotlib.pyplot as plt np.random.seed(2023) df = pd.DataFrame({ \"x\": np.linspace(0, 1, 10), \"A\": np.random.rand(10), \"B\": np.random.rand(10), \"C\": np.random.rand(10), }) for column in df: if column != 'x': plt.plot(df['x'], df[column],label=f\"x vs. {column}\") plt.scatter(df['x'], df[column]) plt.title(\"Performance\") plt.grid() plt.xlabel('x') plt.ylabel('Efficiency') plt.legend() plt.show() plt.savefig('ex421.png',\u2026","rel":"","context":"In &quot;matplotlib&quot;","block_context":{"text":"matplotlib","link":"https:\/\/gantovnik.com\/bio-tips\/category\/matplotlib\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2024\/05\/ex421.png?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2024\/05\/ex421.png?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2024\/05\/ex421.png?resize=525%2C300&ssl=1 1.5x"},"classes":[]},{"id":654,"url":"https:\/\/gantovnik.com\/bio-tips\/2020\/09\/107-tsa-data-2020-vs-2019\/","url_meta":{"origin":579,"position":3},"title":"#107: TSA Data 2020 vs 2019","author":"gantovnik","date":"2020-09-28","format":false,"excerpt":"#107: TSA Data 2020 vs 2019 The TSA has started to publish the daily volume of passengers going through checkpoints on its website. The data set also includes the numbers from 2019 in order to measure the impact of travel as a result of COVID-19. https:\/\/www.tsa.gov\/coronavirus\/passenger-throughput [code language=\"python\"] from bs4\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=350%2C200","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=350%2C200 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=525%2C300 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=700%2C400 2x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=1050%2C600 3x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=1400%2C800 4x"},"classes":[]},{"id":1037,"url":"https:\/\/gantovnik.com\/bio-tips\/2021\/11\/185-creating-a-scatter-plot-using-seaborn\/","url_meta":{"origin":579,"position":4},"title":"#185 Creating a scatter plot using seaborn","author":"gantovnik","date":"2021-11-07","format":false,"excerpt":"[code language=\"python\"] import matplotlib.pyplot as plt import numpy as np import pandas as pd import os import seaborn as sns os.chdir(r'D:\\projects\\wordpress\\ex185') os.getcwd() url = \"https:\/\/archive.ics.uci.edu\/ml\/machine-learning-databases\/auto-mpg\/auto-mpg.data\" df = pd.read_csv(url) column_names = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'year', 'origin', 'name'] df = pd.read_csv(url, names= column_names, delim_whitespace=True) df.loc[df.horsepower == '?', 'horsepower'] =\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=525%2C300&ssl=1 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=700%2C400&ssl=1 2x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=1050%2C600&ssl=1 3x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex185b.jpg?resize=1400%2C800&ssl=1 4x"},"classes":[]},{"id":1943,"url":"https:\/\/gantovnik.com\/bio-tips\/2023\/08\/385-scatterplot-matrix-using-seaborn-library\/","url_meta":{"origin":579,"position":5},"title":"#385 Scatterplot matrix using seaborn library","author":"gantovnik","date":"2023-08-13","format":false,"excerpt":"[code language=\"python\"] #Scatterplot matrix using seaborn library import matplotlib.pyplot as plt #conda install -c anaconda seaborn import seaborn as sns sns.set_theme(style=\"ticks\") def main(): df = sns.load_dataset(\"penguins\") sns.pairplot(df, hue=\"species\") plt.savefig(\"ex385.png\", dpi=100) plt.show() if __name__ == '__main__': main() [\/code]","rel":"","context":"In &quot;plot&quot;","block_context":{"text":"plot","link":"https:\/\/gantovnik.com\/bio-tips\/category\/plot\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2023\/08\/ex385.png?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2023\/08\/ex385.png?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2023\/08\/ex385.png?resize=525%2C300&ssl=1 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2023\/08\/ex385.png?resize=700%2C400&ssl=1 2x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2023\/08\/ex385.png?resize=1050%2C600&ssl=1 3x"},"classes":[]}],"_links":{"self":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/579","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/comments?post=579"}],"version-history":[{"count":0,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/579\/revisions"}],"wp:attachment":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/media?parent=579"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/categories?post=579"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/tags?post=579"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}