Follow ______ on twitter.

8.2.3 Integration of data minding and Big Data analytics

Code

Vega-Lite Chart

{
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
  "description": "Generated samples with K-means clustering and Random Forest classification",
  "config": {
    "view": {
      "stroke": null
    }
  },
  "data": {
    "sequence": {
      "start": 0,
      "stop": 300,
      "step": 1,
      "as": "index"
    }
  },
  "transform": [
    {
      "calculate": "datum.index % 3",
      "as": "trueCluster"
    },
    {
      "calculate": "datum.trueCluster == 0 ? sampleLogNormal(0.3,0.5) * 50 : (datum.trueCluster == 1 ? sampleLogNormal(0.1,1) * 10 + 15 : sampleLogNormal(0.2,0.4) * 20 + 30)",
      "as": "x"
    },
    {
      "calculate": "datum.trueCluster == 0 ? sampleLogNormal(0.1,0.2) * 10 + 5 : (datum.trueCluster == 1 ? sampleLogNormal(0.2,0.2) * 10 + 20 : sampleLogNormal(0.1,0.3) * 10 + 10)",
      "as": "y"
    },
    {
      "calculate": "datum.x < 30 ? 0 : (datum.x < 60 ? 1 : 2)",
      "as": "kMeansCluster"
    },
    {
      "calculate": "datum.x + datum.y < 50 ? 0 : (datum.x + datum.y < 100 ? 1 : 2)",
      "as": "rfClass"
    }
  ],
  "hconcat": [
    {
      "title": "True Clusters",
      "mark": "point",
      "encoding": {
        "x": {
          "field": "x",
          "type": "quantitative",
          "axis": {
            "title": "X-axis"
          }
        },
        "y": {
          "field": "y",
          "type": "quantitative",
          "axis": {
            "title": "Y-axis"
          }
        },
        "color": {
          "field": "trueCluster",
          "type": "nominal",
          "legend": {
            "title": "True Cluster"
          }
        }
      }
    },
    {
      "title": "K-means Clustering",
      "mark": "point",
      "encoding": {
        "x": {
          "field": "x",
          "type": "quantitative",
          "axis": {
            "title": "X-axis"
          }
        },
        "y": {
          "field": "y",
          "type": "quantitative",
          "axis": {
            "title": "Y-axis"
          }
        },
        "color": {
          "field": "kMeansCluster",
          "type": "nominal",
          "legend": {
            "title": "K-means Cluster"
          }
        }
      }
    },
    {
      "title": "Random Forest Classification",
      "mark": "point",
      "encoding": {
        "x": {
          "field": "x",
          "type": "quantitative",
          "axis": {
            "title": "X-axis"
          }
        },
        "y": {
          "field": "y",
          "type": "quantitative",
          "axis": {
            "title": "Y-axis"
          }
        },
        "color": {
          "field": "rfClass",
          "type": "nominal",
          "legend": {
            "title": "RF Class"
          }
        }
      }
    }
  ]
}