{"id":362,"date":"2019-01-29T00:39:24","date_gmt":"2019-01-29T08:39:24","guid":{"rendered":"https:\/\/gantovnik.com\/bio-tips\/?p=362"},"modified":"2024-12-17T09:15:28","modified_gmt":"2024-12-17T17:15:28","slug":"clustering-2","status":"publish","type":"post","link":"https:\/\/gantovnik.com\/bio-tips\/2019\/01\/clustering-2\/","title":{"rendered":"#50 Clustering using Python"},"content":{"rendered":"<p><img data-recalc-dims=\"1\" decoding=\"async\" class=\"alignnone size-full wp-image-363\" src=\"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example60.png?resize=600%2C400&#038;ssl=1\" alt=\"example60\" width=\"600\" height=\"400\" srcset=\"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example60.png?w=600&amp;ssl=1 600w, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/01\/example60.png?resize=300%2C200&amp;ssl=1 300w\" sizes=\"(max-width: 600px) 100vw, 600px\" \/><\/p>\n<pre class=\"lang:python decode:true \">import os\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom scipy.stats import norm\nfrom numpy import array,vstack\nfrom scipy.cluster.vq import vq,kmeans\n#from mpl_toolkits.mplot3d import Axes3D\nos.chdir(r'D:\\projects\\wordpress\\ex50')\nos.getcwd()\n# generate data\ndata=norm.rvs(0,0.3,size=(10000,2))\ninside_ball=np.hypot(data[:,0],data[:,1])&lt;1.0\ndata=data[inside_ball]\ndata = vstack((data, data+array([1,1]),data+array([-1,1])))\n#request the data to be separated into three clusters\ncentroids, distortion = kmeans(data,3)\ncluster_assignment, distances = vq(data,centroids)\nplt.plot(data[cluster_assignment==0,0], data[cluster_assignment==0,1], 'r.')\nplt.plot(data[cluster_assignment==1,0], data[cluster_assignment==1,1], 'g.')\nplt.plot(data[cluster_assignment==2,0], data[cluster_assignment==2,1], 'b.')\nplt.savefig(\"example50.png\", dpi=100)\nplt.show()\nplt.close()<\/pre>\n<p>&nbsp;<\/p>\n","protected":false},"excerpt":{"rendered":"<p>import os import matplotlib.pyplot as plt import numpy as np from scipy.stats import norm from numpy import array,vstack from scipy.cluster.vq import vq,kmeans #from mpl_toolkits.mplot3d import Axes3D os.chdir(r&#8217;D:\\projects\\wordpress\\ex50&#8242;) os.getcwd() # generate data data=norm.rvs(0,0.3,size=(10000,2)) inside_ball=np.hypot(data[:,0],data[:,1])&lt;1.0 data=data[inside_ball] data = vstack((data, data+array([1,1]),data+array([-1,1]))) #request the data to be separated into three clusters centroids, distortion = kmeans(data,3) cluster_assignment, distances = vq(data,centroids) [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"nf_dc_page":"","_et_pb_use_builder":"","_et_pb_old_content":"","_et_gb_content_width":"","_lmt_disableupdate":"yes","_lmt_disable":"","jetpack_post_was_ever_published":false,"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":""},"categories":[11,2],"tags":[14,10],"class_list":["post-362","post","type-post","status-publish","format-standard","hentry","category-machine-learning","category-python","tag-clustering","tag-machine-learning"],"modified_by":"gantovnik","jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"jetpack_shortlink":"https:\/\/wp.me\/p8bH0k-5Q","jetpack_likes_enabled":true,"jetpack-related-posts":[{"id":5,"url":"https:\/\/gantovnik.com\/bio-tips\/2016\/12\/example-1-interpolation\/","url_meta":{"origin":362,"position":0},"title":"Example 1: Interpolation","author":"gantovnik","date":"2016-12-08","format":false,"excerpt":"Example 1: Interpolation","rel":"","context":"In &quot;matplotlib&quot;","block_context":{"text":"matplotlib","link":"https:\/\/gantovnik.com\/bio-tips\/category\/matplotlib\/"},"img":{"alt_text":"example1","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2016\/12\/example1-300x200.png?resize=350%2C200","width":350,"height":200},"classes":[]},{"id":449,"url":"https:\/\/gantovnik.com\/bio-tips\/2019\/08\/smoothing-the-noise-in-real-world-data\/","url_meta":{"origin":362,"position":1},"title":"#65 Smoothing the Noise in Real-world Data","author":"gantovnik","date":"2019-08-22","format":false,"excerpt":"#Smoothing the noise in real-world data #This window rolls over the data and is used to compute the average over that window. import matplotlib.pyplot as plt import numpy as np import os os.chdir(r'D:\\projects\\wordpress\\ex65') def moving_average(interval, window_size): #Compute convoluted window for given size window = np.ones(int(window_size)) \/ float(window_size) return np.convolve(interval, window,\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/08\/ex65.png?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/08\/ex65.png?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/08\/ex65.png?resize=525%2C300&ssl=1 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/08\/ex65.png?resize=700%2C400&ssl=1 2x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/08\/ex65.png?resize=1050%2C600&ssl=1 3x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2019\/08\/ex65.png?resize=1400%2C800&ssl=1 4x"},"classes":[]},{"id":2158,"url":"https:\/\/gantovnik.com\/bio-tips\/2024\/05\/421-plot-a-pandas-data-frame-for-loops-on-columns\/","url_meta":{"origin":362,"position":2},"title":"#421 Plot a pandas data frame for loops on columns","author":"gantovnik","date":"2024-05-04","format":false,"excerpt":"","rel":"","context":"In &quot;matplotlib&quot;","block_context":{"text":"matplotlib","link":"https:\/\/gantovnik.com\/bio-tips\/category\/matplotlib\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2024\/05\/ex421.png?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2024\/05\/ex421.png?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2024\/05\/ex421.png?resize=525%2C300&ssl=1 1.5x"},"classes":[]},{"id":2104,"url":"https:\/\/gantovnik.com\/bio-tips\/2024\/01\/411-clustering-using-dbscan-algorithm-in-sklearn-cluster-in-python\/","url_meta":{"origin":362,"position":3},"title":"#411 Clustering using DBSCAN algorithm in sklearn.cluster in python","author":"gantovnik","date":"2024-01-18","format":false,"excerpt":"DBSCAN works by finding core points that have many data points within a given radius. Once the core is defined, the process is iteratively computed until there are no more core points definable within the maximum radius. This algorithm does exceptionally well compared to kmeans where there is noise present\u2026","rel":"","context":"In &quot;cluster&quot;","block_context":{"text":"cluster","link":"https:\/\/gantovnik.com\/bio-tips\/category\/cluster\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2024\/01\/ex411.png?resize=350%2C200&ssl=1","width":350,"height":200},"classes":[]},{"id":1109,"url":"https:\/\/gantovnik.com\/bio-tips\/2021\/11\/193-animation-using-python\/","url_meta":{"origin":362,"position":4},"title":"#193 Animation using python","author":"gantovnik","date":"2021-11-19","format":false,"excerpt":"","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex193.png?resize=350%2C200&ssl=1","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex193.png?resize=350%2C200&ssl=1 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex193.png?resize=525%2C300&ssl=1 1.5x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2021\/11\/ex193.png?resize=700%2C400&ssl=1 2x"},"classes":[]},{"id":65,"url":"https:\/\/gantovnik.com\/bio-tips\/2018\/12\/linear-regression\/","url_meta":{"origin":362,"position":5},"title":"Linear regression","author":"gantovnik","date":"2018-12-24","format":false,"excerpt":"import os import matplotlib.pyplot as plt import numpy as np os.chdir('\/home\/vg\/Downloads\/projects\/ex9') os.getcwd() plt.figure(figsize=(10,8)) N = 100 start = 0 end = 1 A = np.random.rand() + 1 B = np.random.rand() x = np.linspace(start,end,N) y = A * x + B y = y + np.random.randn(N)\/10 p = np.polyfit(x,y,1) plt.plot(x,y,'o',label='Given data:\u2026","rel":"","context":"In &quot;python&quot;","block_context":{"text":"python","link":"https:\/\/gantovnik.com\/bio-tips\/category\/python\/"},"img":{"alt_text":"example9","src":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2018\/12\/example9.png?resize=350%2C200","width":350,"height":200,"srcset":"https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2018\/12\/example9.png?resize=350%2C200 1x, https:\/\/i0.wp.com\/gantovnik.com\/bio-tips\/wp-content\/uploads\/2018\/12\/example9.png?resize=525%2C300 1.5x"},"classes":[]}],"_links":{"self":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/362","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/comments?post=362"}],"version-history":[{"count":2,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/362\/revisions"}],"predecessor-version":[{"id":6764,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/posts\/362\/revisions\/6764"}],"wp:attachment":[{"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/media?parent=362"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/categories?post=362"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/gantovnik.com\/bio-tips\/wp-json\/wp\/v2\/tags?post=362"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}