H. Chase Stevens Logo

Visualizing k-means clustering (using d3.js)

posted on: Friday, March 22, 2013 by

Having learned a little about d3.js at a hackathon I attended, I decided to put it to good use. Below is an interactive implementation of the k-means clustering algorithm, as visualized using d3.js. Click to place points. Centroids are (initially) set on randomly chosen points, and the number of clusters is set by a supposed rule of thumb. You can choose to step through the algorithm one centroid-readjustment/simultaneous point assignment at a time, or view the process in real time.

Download


//config values:

var colors = ['coral','green','steelblue','seagreen','yellow','cyan','salmon','skyblue','pink']

var radius = 8;

var trans_secs = 2;

var update_ms = 150;

var svg_size = {width:600,height:400};



//initialization

var mouse_x;

var mouse_y;

var new_points = false;

var points = new Array();

var centers = new Array();

var algo_started = false;

var svg = d3.select('#kmeans').append('svg')

.attr('width',svg_size.width)

.attr('height',svg_size.height);

canvas = document.getElementById('kmeans');

var stream = false;

var stop_update = false;



//@ http://jsfromhell.com/array/shuffle [v1.0]

function shuffle(o){ //v1.0

    for(var j, x, i = o.length; i; j = parseInt(Math.random() * i), x = o[--i], o[i] = o[j], o[j] = x);

    return o;

};



function safeShuffle(o){

	var x = new Array();

	for(var i = 0; i < o.length; i++){ 

		x[i] = o[i]; 

	}

	return shuffle(x);

}



function addPoint(){

	//add new point to canvas (d3)

	if (algo_started) {return true;}

	var rel_x = mouse_x - canvas.offsetLeft;

	var rel_y = mouse_y - canvas.offsetTop;

	if (points.length >= 1 && (points[points.length-1].x == rel_x)

		&& (points[points.length-1].y == rel_y)){ //making sure double-clicks don't get counted as two points

		return true;

	}

	points.push({x:(rel_x),y:(rel_y),plotted:false,nearest_center:null});

	new_points = true;

}



//tracking mouse:

document.captureEvents(Event.MOUSEMOVE);

document.onmousemove = getMousePosition;

function getMousePosition(mp) {

	mouse_x = mp.pageX;

	mouse_y = mp.pageY;

	return true;

}



function capitalize(string)

{

    return string.charAt(0).toUpperCase() + string.slice(1);

}



function createCenters () {

	//creating centroids

	document.getElementById('create').style.display = 'none';

	document.getElementById('move').style.display = 'block';

	algo_started = true;

	rand_points = safeShuffle(points);

	for (var i = 0; i < Math.round(Math.sqrt(rand_points.length / 2)); i++){

		centers.push({x:rand_points[i].x, y:rand_points[i].y, color:colors[i%colors.length], old_x:null, old_y:null});

	}

	svg.selectAll('.center')

	.data(centers)

	.enter()

	.append('circle')

	.attr('cx', function(d){return d.x;})

	.attr('cy', function(d){return d.y;})

	.attr('r', radius)

	.attr('style', function(d){return 'fill:' + d.color;})

	.attr('class','center')

   	.append("svg:title")

   	.text(function(d){return capitalize(d.color) + ' cluster center'});

}



function moveCenters () {

	//recalculate centroid positions given their assigned points

	for (var i = 0; i < centers.length; i++){

		var center = centers[i];

		center.old_x = center.x;

		center.old_y = center.y;

		var new_x = 0;

		var new_y = 0;

		var point_count = 0;

		for (var j = 0; j < points.length; j++){

			if (points[j].nearest_center == center){

				new_x += points[j].x;

				new_y += points[j].y;

				point_count++;

			}

		}

		center.x = Math.round(new_x / point_count);

		center.y = Math.round(new_y / point_count);

	}

	svg.selectAll('.center')

	.data(centers)

	.transition()

	.ease('linear')

	.duration(trans_secs * 1000)

	.attr('cx', function(d){return d.x;})

	.attr('cy', function(d){return d.y;});



	//check to see if centroids have failed to move any more (i.e. algorithm complete)

	var all_finalized = true;

	var svg_centers = svg.selectAll('.center')[0];

	for (var i = 0; i < centers.length; i++){

		var center = centers[i];

		var svg_center = {x: Math.round(svg_centers[i].cx.baseVal.value),

			y: Math.round(svg_centers[i].cy.baseVal.value)};

		all_finalized &= (center.x == center.old_x 

			&& center.y == center.old_y 

			&& Math.round(center.x) == svg_center.x 

			&& Math.round(center.y) == svg_center.y

			);

	}

	if (all_finalized){

		stream = false;

		document.getElementById('movecenters').disabled = true;

		document.getElementById('togglestream').disabled = true;

		document.getElementById('final').innerHTML = "Finalized!";

		stop_update = true;

	}

}



function eucDist(a,b){

	return Math.sqrt(Math.pow((a.x - b.x),2) + Math.pow((a.y - b.y),2));

}



function updatePoints(){

	//either creates new svg objects based on clicked points,

	//or re-colors points based on current centroid locations

	if (new_points && !algo_started){

		svg.selectAll('.point')

		.data(points)

		.enter()

		.append('circle')

		.attr('cx', function(d){return d.x;})

		.attr('cy', function(d){return d.y;})

		.attr('r', radius)

		.attr('class','point');

		new_points = false;

	}

	if (algo_started){

		for (var i = [0]; i < points.length; i++){

			var point = points[i];

			var center_candidate = null;

			var center_candidate_dist = 99999;

			var svg_centers = svg.selectAll('.center')[0]

			for (var j = 0; j < centers.length; j++){

				var svg_center = svg_centers[j];

				center_dist = eucDist(point,{x:svg_center.cx.baseVal.value,y:svg_center.cy.baseVal.value});

				if (center_dist < center_candidate_dist){

					center_candidate_dist = center_dist;

					center_candidate = centers[j];

				}

			}

			point.nearest_center = center_candidate;

			point.color = 'light' + point.nearest_center.color;

		}

		svg.selectAll('.point')

		.data(points)

		.attr('style', function(d){return 'fill:' + d.color;});

	}

}



function updateDisplay(){

	document.getElementById('numpoints').innerHTML = points.length;

	document.getElementById('numclusters').innerHTML = Math.round(Math.sqrt(points.length / 2));

}



function toggleStream(){

	stream ^= true;

}



window.setInterval(update,update_ms);

function update(){

	if (stop_update){return true;}

	updatePoints();

	updateDisplay();

	if (stream) { moveCenters(); }

}