{"id":387,"date":"2019-02-13T00:11:36","date_gmt":"2019-02-13T08:11:36","guid":{"rendered":"https:\/\/gantovnik.com\/bio-tips\/?p=387"},"modified":"2021-11-12T15:01:38","modified_gmt":"2021-11-12T23:01:38","slug":"pca-principal-component-analysis","status":"publish","type":"post","link":"https:\/\/gantovnik.com\/bio-tips\/2019\/02\/pca-principal-component-analysis\/","title":{"rendered":"#56 PCA &#8211; Principal Component Analysis"},"content":{"rendered":"<pre class=\"brush: python; title: ; notranslate\" title=\"\">\r\nimport os\r\nimport matplotlib.pyplot as plt\r\nimport numpy as np\r\nimport seaborn as sns\r\nsns.set()\r\n#from mpl_toolkits.mplot3d import Axes3D\r\nos.chdir(r'D:\\projects\\wordpress\\ex56')\r\nos.getcwd()\r\nrng = np.random.RandomState()\r\nX = np.dot(rng.rand(2, 2), rng.randn(2, 200)).T\r\nplt.scatter(X&#x5B;:, 0], X&#x5B;:, 1])\r\nplt.axis('equal')\r\n\r\nfrom sklearn.decomposition import PCA\r\npca = PCA(n_components=2)\r\npca.fit(X)\r\nprint(pca.components_)\r\nprint(pca.explained_variance_)\r\n\r\n\r\ndef draw_vector(v0, v1, ax=None):\r\n    ax = ax or plt.gca()\r\n    arrowprops=dict(arrowstyle='-&gt;',color='blue',linewidth=2,shrinkA=0, shrinkB=0)\r\n    ax.annotate('', v1, v0, arrowprops=arrowprops)\r\n    \r\nplt.scatter(X&#x5B;:, 0], X&#x5B;:, 1], c='red', alpha=0.3)\r\nfor length, vector in zip(pca.explained_variance_, pca.components_):\r\n    v = vector * 3 * np.sqrt(length)\r\n    draw_vector(pca.mean_, pca.mean_ + v)\r\n\r\nplt.axis('equal');\r\n\r\nplt.savefig(&quot;example56.png&quot;, dpi=100)\r\nplt.show()\r\nplt.close()\r\n\r\npca = PCA(n_components=1)\r\npca.fit(X)\r\nX_pca = pca.transform(X)\r\nprint(&quot;original shape: &quot;, X.shape)\r\nprint(&quot;transformed shape:&quot;, X_pca.shape)\r\n\r\nX_new = pca.inverse_transform(X_pca)\r\nplt.scatter(X&#x5B;:, 0], X&#x5B;:, 1], alpha=0.2)\r\nplt.scatter(X_new&#x5B;:, 0], X_new&#x5B;:, 1], c='red',alpha=0.2)\r\nplt.axis('equal')\r\nplt.savefig(&quot;example56_1.png&quot;, dpi=100)\r\nplt.show()\r\nplt.close()\r\n<\/pre>\n<p><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/02\/example65.png?resize=600%2C400&#038;ssl=1\" alt=\"example65\" width=\"600\" height=\"400\" class=\"alignnone size-full wp-image-388\" srcset=\"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/02\/example65.png?w=600&amp;ssl=1 600w, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/02\/example65.png?resize=300%2C200&amp;ssl=1 300w\" sizes=\"(max-width: 600px) 100vw, 600px\" \/><\/p>\n<p><img data-recalc-dims=\"1\" decoding=\"async\" src=\"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/02\/example65_1.png?resize=600%2C400&#038;ssl=1\" alt=\"example65_1\" width=\"600\" height=\"400\" class=\"alignnone size-full wp-image-389\" srcset=\"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/02\/example65_1.png?w=600&amp;ssl=1 600w, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/02\/example65_1.png?resize=300%2C200&amp;ssl=1 300w\" sizes=\"(max-width: 600px) 100vw, 600px\" \/><\/p>\n","protected":false},"excerpt":{"rendered":"<p>import os import matplotlib.pyplot as plt import numpy as np import seaborn as sns sns.set() #from mpl_toolkits.mplot3d import Axes3D os.chdir(r&#8217;D:\\projects\\wordpress\\ex56&#8242;) os.getcwd() rng = np.random.RandomState() X = np.dot(rng.rand(2, 2), rng.randn(2, 200)).T plt.scatter(X&#x5B;:, 0], X&#x5B;:, 1]) plt.axis(&#8216;equal&#8217;) from sklearn.decomposition import PCA pca = PCA(n_components=2) pca.fit(X) print(pca.components_) print(pca.explained_variance_) def draw_vector(v0, v1, ax=None): ax = ax or plt.gca() arrowprops=dict(arrowstyle=&#8217;-&gt;&#8217;,color=&#8217;blue&#8217;,linewidth=2,shrinkA=0, [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"nf_dc_page":"","_et_pb_use_builder":"","_et_pb_old_content":"","_et_gb_content_width":"","_lmt_disableupdate":"yes","_lmt_disable":"","jetpack_post_was_ever_published":false,"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[2],"tags":[],"class_list":["post-387","post","type-post","status-publish","format-standard","hentry","category-python"],"modified_by":"gantovnik","jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"jetpack_shortlink":"https:\/\/wp.me\/p8bH0k-6f","jetpack_likes_enabled":true,"jetpack-related-posts":[{"id":654,"url":"https:\/\/gantovnik.com\/bio-tips\/2020\/09\/107-tsa-data-2020-vs-2019\/","url_meta":{"origin":387,"position":0},"title":"#107: TSA Data 2020 vs 2019","author":"gantovnik","date":"2020-09-28","format":false,"excerpt":"#107: TSA Data 2020 vs 2019 The TSA has started to publish the daily volume of passengers going through checkpoints on its website. The data set also includes the numbers from 2019 in order to measure the impact of travel as a result of COVID-19. https:\/\/www.tsa.gov\/coronavirus\/passenger-throughput [code language=\"python\"] from bs4\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=350%2C200","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=350%2C200 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=525%2C300 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=700%2C400 2x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=1050%2C600 3x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2020\/09\/ex107.png?resize=1400%2C800 4x"},"classes":[]},{"id":383,"url":"https:\/\/gantovnik.com\/bio-tips\/2019\/02\/annotating-text\/","url_meta":{"origin":387,"position":1},"title":"#55 Annotating text in matplotlib plot","author":"gantovnik","date":"2019-02-12","format":false,"excerpt":"[code language=\"python\"] import os import matplotlib.pyplot as plt import numpy as np os.chdir(r'D:\\projects\\wordpress\\ex55') os.getcwd() fig = plt.figure() ax = fig.add_subplot(111) t = np.arange(0.0, 5.0, 0.01) s = np.cos(2*np.pi*t) line, = ax.plot(t, s, lw=2) ax.annotate('local max', xy=(2, 1), xytext=(3, 1.5), arrowprops=dict(facecolor='red', shrink=0.05), ) ax.set_ylim(-2,2) plt.savefig(\"example55.png\", dpi=100) plt.show() fig = plt.figure() ax\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/02\/example64_1.png?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/02\/example64_1.png?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/02\/example64_1.png?resize=525%2C300&ssl=1 1.5x"},"classes":[]},{"id":96,"url":"https:\/\/gantovnik.com\/bio-tips\/2018\/12\/bisection-method\/","url_meta":{"origin":387,"position":2},"title":"#16 Bisection method using python","author":"gantovnik","date":"2018-12-31","format":false,"excerpt":"[code language=\"python\"] import os import matplotlib.pyplot as plt import numpy as np os.chdir('\/home\/vg\/Downloads\/projects\/ex16') os.getcwd() f = lambda x: np.exp(x)-2 tol=0.1 a,b=-2,2 x=np.linspace(-2.1,2.1,1000) fig,ax=plt.subplots(1,1,figsize=(12,4)) ax.plot(x,f(x),lw=1.5) ax.axhline(0,ls=':',color='k') ax.set_xticks([-2,-1,0,1,2]) ax.set_xlabel(r'$x$',fontsize=18) ax.set_ylabel(r'$f(x)$',fontsize=18) fa,fb=f(a),f(b) ax.plot(a,fa,'ko') ax.plot(b,fb,'ko') ax.text(a,fa+0.5,r\"$a$\",ha='center',fontsize=18) ax.text(b,fb+0.5,r\"$b$\",ha='center',fontsize=18) n=1 while b-a &gt; tol: m=a+(b-a)\/2 fm=f(m) ax.plot(m,fm,'ko') ax.text(m,fm-0.5,r\"$m_%d$\" % n,ha='center') n=n+1 if (np.sign(fa)==np.sign(fm)): a,fa=m,fm else: b,fb=m,fm\u2026","rel":"","context":"In &quot;optimization&quot;","block_context":{"text":"optimization","link":"https:\/\/gantovnik.com\/bio-tips\/category\/optimization\/"},"img":{"alt_text":"example16","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2018\/12\/example16.png?resize=350%2C200","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2018\/12\/example16.png?resize=350%2C200 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2018\/12\/example16.png?resize=525%2C300 1.5x"},"classes":[]},{"id":1179,"url":"https:\/\/gantovnik.com\/bio-tips\/2021\/11\/204-mandelbrot-fractal-using-python-2\/","url_meta":{"origin":387,"position":3},"title":"#205 Plots with annotation in matplotlib","author":"gantovnik","date":"2021-11-26","format":false,"excerpt":"[code language=\"python\"] import numpy as np import matplotlib.pyplot as plt plt.fig = plt.figure(1) plt.ax = plt.subplot(111) x = np.linspace(0,2*np.pi,100) # Function that modulates the amplitude of the sin function amod_sin = lambda x: (1.-0.1*np.sin(25*x))*np.sin(x) plt.ax.plot(x,np.sin(x),label = 'sin') plt.ax.plot(x, amod_sin(x), label = 'modsin') annot1=plt.ax.annotate('amplitude modulated\\n curve', (2.1,1.0), (3.2,0.5), arrowprops={'width':2,'color':'k','connectionstyle':'arc3,rad=+0.5','shrink':0.05},verticalalignment='bottom', horizontalalignment='left', fontsize=10,\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex205.png?resize=350%2C200&ssl=1","width":350,"height":200},"classes":[]},{"id":1221,"url":"https:\/\/gantovnik.com\/bio-tips\/2021\/11\/210-parametric-curve-in-3d-2-2-2-2-2-2-2\/","url_meta":{"origin":387,"position":4},"title":"#217 Annotation in matplotlib","author":"gantovnik","date":"2021-11-28","format":false,"excerpt":"[code language=\"python\"] import numpy as np import matplotlib.pyplot as plt from matplotlib.ticker import AutoMinorLocator, MultipleLocator np.random.seed(19680801) X = np.linspace(0.5, 3.5, 100) Y1 = 3+np.cos(X) Y2 = 1+np.cos(1+X\/0.75)\/2 Y3 = np.random.uniform(Y1, Y2, len(X)) fig = plt.figure(figsize=(8, 8)) ax = fig.add_subplot(1, 1, 1, aspect=1) def minor_tick(x, pos): if not x % 1.0:\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex217-1.png?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex217-1.png?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex217-1.png?resize=525%2C300&ssl=1 1.5x"},"classes":[]},{"id":2800,"url":"https:\/\/gantovnik.com\/bio-tips\/2024\/07\/437-particle-swarm-optimization-using-python\/","url_meta":{"origin":387,"position":5},"title":"#437 Particle swarm optimization using python","author":"gantovnik","date":"2024-07-20","format":false,"excerpt":"import numpy as np import matplotlib.pyplot as plt from matplotlib.animation import FuncAnimation def f(x,y): return (x-3.14)**2 + (y-2.72)**2 + np.sin(3*x+1.41) + np.sin(4*y-1.73) # Compute and plot the function in 3D within [0,5]x[0,5] x, y = np.array(np.meshgrid(np.linspace(0,5,100), np.linspace(0,5,100))) z = f(x, y) # Find the global minimum x_min = x.ravel()[z.argmin()] y_min\u2026","rel":"","context":"In &quot;animation&quot;","block_context":{"text":"animation","link":"https:\/\/gantovnik.com\/bio-tips\/category\/animation\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2024\/07\/ex437.gif?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2024\/07\/ex437.gif?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2024\/07\/ex437.gif?resize=525%2C300&ssl=1 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2024\/07\/ex437.gif?resize=700%2C400&ssl=1 2x"},"classes":[]}],"_links":{"self":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/387","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/comments?post=387"}],"version-history":[{"count":0,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/387\/revisions"}],"wp:attachment":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/media?parent=387"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/categories?post=387"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/tags?post=387"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}