DBSCAN works by finding core points that have many data points within a given radius. Once the core is defined, the process is iteratively computed until there are no more core points definable within the maximum radius. This algorithm does exceptionally well compared to kmeans where there is noise present in the data.

import numpy as np
import matplotlib.pyplot as mpl
from sklearn.cluster import DBSCAN
# Creating data
c1 = np.random.randn(100, 2) + 5
c2 = np.random.randn(50, 2)
# Creating a uniformly distributed background
u1 = np.random.uniform(low=-10, high=10, size=100)
u2 = np.random.uniform(low=-10, high=10, size=100)
c3 = np.column_stack([u1, u2])
# Pooling all the data into one 150 x 2 array
data = np.vstack([c1, c2, c3])
# Calculating the cluster with DBSCAN function.
# db.labels_ is an array with identifiers to the
# different clusters in the data.
#db = DBSCAN().fit(data, eps=0.95, min_samples=10)
db = DBSCAN().fit(data)
labels = db.labels_
# Retrieving coordinates for points in each
# identified core. There are two clusters
# denoted as 0 and 1 and the noise is denoted
# as -1. Here we split the data based on which
# component they belong to.
dbc1 = data[labels == 0]
dbc2 = data[labels == 1]
noise = data[labels == -1]
# Setting up plot details
x1, x2 = -12, 12
y1, y2 = -12, 12
fig = mpl.figure()
fig.subplots_adjust(hspace=0.1, wspace=0.1)
ax1 = fig.add_subplot(121, aspect='equal')
ax1.scatter(c1[:,0], c1[:,1], lw=0.1, color='#00CC00', marker=".")
ax1.scatter(c2[:,0], c2[:,1], lw=0.1, color='#028E9B', marker=".")
ax1.scatter(c3[:,0], c3[:,1], lw=0.1, color='#FF7800', marker=".")
ax1.xaxis.set_visible(False)
ax1.yaxis.set_visible(False)
ax1.set_xlim(x1, x2)
ax1.set_ylim(y1, y2)
ax1.text(-11, 10, 'Original')
ax2 = fig.add_subplot(122, aspect='equal')
ax2.scatter(dbc1[:,0], dbc1[:,1], lw=0.1, color='#00CC00', marker=".")
ax2.scatter(dbc2[:,0], dbc2[:,1], lw=0.1, color='#028E9B', marker=".")
ax2.scatter(noise[:,0], noise[:,1], lw=0.1, color='#FF7800', marker=".")
ax2.xaxis.set_visible(False)
ax2.yaxis.set_visible(False)
ax2.set_xlim(x1, x2)
ax2.set_ylim(y1, y2)
ax2.text(-11, 10, 'DBSCAN identified')
fig.savefig("ex411.png", dpi=100, bbox_inches='tight')
fig.savefig('ex411.pdf', bbox_inches='tight')

Discover more from Tips and Hints for Aerospace Engineers

Subscribe now to keep reading and get access to the full archive.

Continue reading