I toyed around at last weekend's Bitcamp hackathon playing with some larger data sets and attempting to visualize them. My main effort was focused on extracting data from the Behavioral Risk Factor Surveillance System annual 2012 survey. Unfortunately, the CDC uses a rather obscure data format (SAS), which required downloading the StatTransfer file transfer software. The complete dataset still hasn't been visualized, but it's off to a start, having visualized the response rates to the survey from each state:

alt text




Implementation

In order to parse the CSV file we extracted using the StatTransfer software for response rates, we developed a simple python script to loop through the CSV file and count each data row that belonged to a specific state, outputting the data in json format.

import csv

STATES = {
    1: 'Alabama',
    2: 'Alaska',
    4: 'Arizona',
    5: 'Arkansas',
    6: 'California',
    8: 'Colorado',
    9: 'Connecticut',
    10: 'Delaware',
    11: 'District of Columbia',
    12: 'Florida',
    13: 'Georgia',
    15: 'Hawaii',
    16: 'Idaho',
    17: 'Illinois',
    18: 'Indiana',
    19: 'Iowa',
    20: 'Kansas',
    21: 'Kentucky',
    22: 'Louisiana',
    23: 'Maine',
    24: 'Maryland',
    25: 'Massachusetts',
    26: 'Michigan',
    27: 'Minnesota',
    28: 'Mississippi',
    29: 'Missouri',
    30: 'Montana',
    31: 'Nebraska',
    32: 'Nevada',
    33: 'New Hampshire',
    34: 'New Jersey',
    35: 'New Mexico',
    36: 'New York',
    37: 'North Carolina',
    38: 'North Dakota',
    39: 'Ohio',
    40: 'Oklahoma',
    41: 'Oregon',
    42: 'Pennsylvania',
    44: 'Rhode Island',
    45: 'South Carolina',
    46: 'South Dakota',
    47: 'Tennessee',
    48: 'Texas',
    49: 'Utah',
    50: 'Vermont',
    51: 'Virginia',
    53: 'Washington',
    54: 'West Virginia',
    55: 'Wisconsin',
    56: 'Wyoming',
    66: 'Guam',
    72: 'Puerto Rico',
}

count = {}
for state_id in STATES:
    count[state_id] = 0

with open('LLCP2012.CSV') as csv_file:
    reader = csv.reader(csv_file)
    lines_processed = 0
    reader.next()
    for row in reader:
        state = int(row[0])
        count[state] += 1
        lines_processed += 1
        if lines_processed % 1000 == 0:
            print 'Processed %d lines' % lines_processed

with open('output.json', 'w') as output_file:
    output_file.write('{"States": [')
    for state_id, state_name in STATES.items():
        print '%d\t%20s:\t%d entries.' % (state_id, state_name, count[state_id])
        output_file.write('{"name":"%s","pop":"%d"},\n' % (state_name, count[state_id]))
    output_file.write("]}");

Then we used d3.js to visualize it. We used the albersUsa() d3 states projection to project the map of the U.S., and a quantize scale to map the response rates to a limited range of colors. We hooked into the DOM with css classes corresponding to the output of the quantize function to apply the style. Here's the code:

<!DOCTYPE html>
<meta charset="utf-8">

<style>

path {
  fill: #ccc;
  stroke: #ccc;
  stroke-linejoin: round;
}

path.land {
  fill: #eee;
  stroke: #eee;
  stroke-linejoin: round;
}


path.states {
    stroke: #ccc;
    fill: #eee;
    stroke-width: 1.0;
}

path.counties {
  stroke: #aaa;
    fill: #eee;
    stroke-width: 1.0;
}

.hidden {
  display: none;
}

.tooltip_container {
  position: absolute;
  border: 1px solid red;
  height: 20px;
}

.tooltip {
  background-color: rgba(0, 0, 0, 0.6);
  color: #fff;
  opacity: 0.9;
  padding: 5px;
  text-align: center;
  border-radius: 10px;
  position: absolute;
  visibility: visible;
  border: 1px solid rgba(0, 0, 0, 0.6);
  height: auto;
  width: auto;
  -webkit-transform: translateY(10px);
     -moz-transform: translateY(10px);
      -ms-transform: translateY(10px);
       -o-transform: translateY(10px);
          transform: translateY(10px);
  -webkit-transition: all .25s ease-out;
     -moz-transition: all .25s ease-out;
      -ms-transition: all .25s ease-out;
       -o-transition: all .25s ease-out;
          transition: all .25s ease-out;
  -webkit-box-shadow: 2px 2px 6px rgba(0, 0, 0, 0.28);
     -moz-box-shadow: 2px 2px 6px rgba(0, 0, 0, 0.28);
      -ms-box-shadow: 2px 2px 6px rgba(0, 0, 0, 0.28);
       -o-box-shadow: 2px 2px 6px rgba(0, 0, 0, 0.28);
          box-shadow: 2px 2px 6px rgba(0, 0, 0, 0.28);
}


/* CSS Triangles - see Trevor's post */
.tooltip:after {
    border-left: solid transparent 10px;
    border-right: solid transparent 10px;
    border-top: solid rgba(0, 0, 0, 0.6) 10px;
    bottom: -10px;
    content: " ";
    height: 0;
    left: 50%;
    margin-left: -13px;
    position: absolute;
    width: 0;
}


.q0-5 { fill:rgb(247,251,255); }
.q1-5 { fill:rgb(198,219,239); }
.q2-5 { fill:rgb(107,174,214); }
.q3-5 { fill:rgb(33,113,181); }
.q4-5 { fill:rgb(8,48,107); }

.title {
  fill: #222;
  text-decoration: underline;
  fill-opacity: .5;
  font-size: 20px;
  font-weight: bold;
  text-anchor: middle;
  pointer-events: none;
}

.overlay {
  fill: #555;
  fill-opacity: .5;
  font-size: 20px;
  font-weight: 500;
  text-anchor: middle;
  pointer-events: none;
}


#legend {
    padding: 1.5em 0 0 1.5em;
}

.key {
    border-top-width: 15px;
    border-top-style: solid;
    font-size: .75em;
    width: 10%;
    padding-left: 0;
    padding-right: 0;
    display: inline-block;
}

.q0-5-l { border-top-color:rgb(247,251,255); }
.q1-5-l { border-top-color:rgb(198,219,239); }
.q2-5-l { border-top-color:rgb(107,174,214); }
.q3-5-l { border-top-color:rgb(33,113,181); }
.q4-5-l { border-top-color:rgb(8,48,107); }

</style>

<body>

<script src="http://d3js.org/d3.v3.min.js" charset="utf-8"></script>
<script src="http://d3js.org/topojson.v1.min.js"></script>
<script src="http://d3js.org/queue.v1.min.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/jquery/2.1.0/jquery.min.js"></script>
<script src="http://cdnjs.cloudflare.com/ajax/libs/underscore.js/1.6.0/underscore-min.js"></script>
<script src="http://d3js.org/colorbrewer.v1.min.js"></script>

<div id="map">
</div>


<script>

    var width = 1160,
        height = 1160;

    var rateById = d3.map();

    var quantize = d3.scale.quantize()
            .domain([0, 25000])
            .range(d3.range(5).map(function(i) { return "q" + i + "-5"; }));

    var svg = d3.select("#map").append("svg")
      .attr("width", width)
      .attr("height", height);


       queue()
        .defer(d3.json, "http://localhost:3000/us.json", render)
        .defer(d3.json, "http://localhost:3000/output.json", function(d) { 
        for (var i = 0; i < d["States"].length; i++) { 
          rateById.set(d["States"][i].name, d["States"][i].pop);
        }
      }).await(render);


    function render(error, us) {
      console.log(rateById);
      var states = topojson.feature(us, us.objects.states).features;

      var projection = d3.geo.albersUsa()
                      .scale(width)
                      .translate([width / 2, height / 2]);

      var path = d3.geo.path().projection(projection);

      var tooltip = d3.select("#map").append("div").attr("class", "tooltip"); 

      var states = svg.append("g")
          .attr("class", "states")
          .selectAll("path")
          .data(states)
          .enter().append("path")
          .attr("class", function(d) { return quantize(rateById.get(d.properties.name.toString())); })
          .attr("d", path)
          .on('mousemove', function(d,i) {
            var mouse = d3.mouse(this);
            console.log(mouse);

            var tooltipWidth = $(".tooltip").width();
            var tooltipHeight = $(".tooltip").height();
            var widthFactor = $(window).width() - $("#map").width();

            tooltip
            .classed("hidden", false)
            .attr("style", "left:"+(mouse[0]-(tooltipWidth/2) + widthFactor)+"px;top:"+(mouse[1]-(tooltipHeight*2))+"px")
            .html(d.properties.name + "<br>" + Math.round(rateById.get(d.properties.name)))
          })
          .on("mouseout",  function(d,i) {
                tooltip.classed("hidden", true)
            });

      svg.append("text")
        .attr("x", width/2 )
        .attr("y", 250)
        .style("text-anchor", "middle")
        .text("Responses by State from the Behavioral Risk Factor Surveillance System Survey")
        .attr("class", "title")


      svg.append("text")
        .attr("x", width / 2 )
        .attr("y", height/2)
        .style("text-anchor", "middle")
        .text("United States of America")
        .attr("class", "overlay")


        var legend = d3.select('#map').append('div')
          .attr('id', 'legend')
          .append('ul')
          .attr('class', 'list-inline');

        var keys = legend.selectAll('li.key')
          .data(quantize.range());

        keys.enter().append('li')
          .attr('class', function(d) { return "key " + d + "-l"; })
          .text(function(d) {
            var r = quantize.invertExtent(d);
            return r[0];
          });

      }


</script>