DBSCAN works by finding core points that have many data points within a given radius. Once the core is defined, the process is iteratively computed until there are no more core points definable within the maximum radius. This algorithm does exceptionally well compared to kmeans where there is noise present in the data.
import numpy as np import matplotlib.pyplot as mpl from sklearn.cluster import DBSCAN # Creating data c1 = np.random.randn(100, 2) + 5 c2 = np.random.randn(50, 2) # Creating a uniformly distributed background u1 = np.random.uniform(low=-10, high=10, size=100) u2 = np.random.uniform(low=-10, high=10, size=100) c3 = np.column_stack([u1, u2]) # Pooling all the data into one 150 x 2 array data = np.vstack([c1, c2, c3]) # Calculating the cluster with DBSCAN function. # db.labels_ is an array with identifiers to the # different clusters in the data. #db = DBSCAN().fit(data, eps=0.95, min_samples=10) db = DBSCAN().fit(data) labels = db.labels_ # Retrieving coordinates for points in each # identified core. There are two clusters # denoted as 0 and 1 and the noise is denoted # as -1. Here we split the data based on which # component they belong to. dbc1 = data[labels == 0] dbc2 = data[labels == 1] noise = data[labels == -1] # Setting up plot details x1, x2 = -12, 12 y1, y2 = -12, 12 fig = mpl.figure() fig.subplots_adjust(hspace=0.1, wspace=0.1) ax1 = fig.add_subplot(121, aspect='equal') ax1.scatter(c1[:,0], c1[:,1], lw=0.1, color='#00CC00', marker=".") ax1.scatter(c2[:,0], c2[:,1], lw=0.1, color='#028E9B', marker=".") ax1.scatter(c3[:,0], c3[:,1], lw=0.1, color='#FF7800', marker=".") ax1.xaxis.set_visible(False) ax1.yaxis.set_visible(False) ax1.set_xlim(x1, x2) ax1.set_ylim(y1, y2) ax1.text(-11, 10, 'Original') ax2 = fig.add_subplot(122, aspect='equal') ax2.scatter(dbc1[:,0], dbc1[:,1], lw=0.1, color='#00CC00', marker=".") ax2.scatter(dbc2[:,0], dbc2[:,1], lw=0.1, color='#028E9B', marker=".") ax2.scatter(noise[:,0], noise[:,1], lw=0.1, color='#FF7800', marker=".") ax2.xaxis.set_visible(False) ax2.yaxis.set_visible(False) ax2.set_xlim(x1, x2) ax2.set_ylim(y1, y2) ax2.text(-11, 10, 'DBSCAN identified') fig.savefig("ex411.png", dpi=100, bbox_inches='tight') fig.savefig('ex411.pdf', bbox_inches='tight')
Recent Comments