{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"description": "Generated samples with K-means clustering and Random Forest classification",
"config": {
"view": {
"stroke": null
}
},
"data": {
"sequence": {
"start": 0,
"stop": 300,
"step": 1,
"as": "index"
}
},
"transform": [
{
"calculate": "datum.index % 3",
"as": "trueCluster"
},
{
"calculate": "datum.trueCluster == 0 ? sampleLogNormal(0.3,0.5) * 50 : (datum.trueCluster == 1 ? sampleLogNormal(0.1,1) * 10 + 15 : sampleLogNormal(0.2,0.4) * 20 + 30)",
"as": "x"
},
{
"calculate": "datum.trueCluster == 0 ? sampleLogNormal(0.1,0.2) * 10 + 5 : (datum.trueCluster == 1 ? sampleLogNormal(0.2,0.2) * 10 + 20 : sampleLogNormal(0.1,0.3) * 10 + 10)",
"as": "y"
},
{
"calculate": "datum.x < 30 ? 0 : (datum.x < 60 ? 1 : 2)",
"as": "kMeansCluster"
},
{
"calculate": "datum.x + datum.y < 50 ? 0 : (datum.x + datum.y < 100 ? 1 : 2)",
"as": "rfClass"
}
],
"hconcat": [
{
"title": "True Clusters",
"mark": "point",
"encoding": {
"x": {
"field": "x",
"type": "quantitative",
"axis": {
"title": "X-axis"
}
},
"y": {
"field": "y",
"type": "quantitative",
"axis": {
"title": "Y-axis"
}
},
"color": {
"field": "trueCluster",
"type": "nominal",
"legend": {
"title": "True Cluster"
}
}
}
},
{
"title": "K-means Clustering",
"mark": "point",
"encoding": {
"x": {
"field": "x",
"type": "quantitative",
"axis": {
"title": "X-axis"
}
},
"y": {
"field": "y",
"type": "quantitative",
"axis": {
"title": "Y-axis"
}
},
"color": {
"field": "kMeansCluster",
"type": "nominal",
"legend": {
"title": "K-means Cluster"
}
}
}
},
{
"title": "Random Forest Classification",
"mark": "point",
"encoding": {
"x": {
"field": "x",
"type": "quantitative",
"axis": {
"title": "X-axis"
}
},
"y": {
"field": "y",
"type": "quantitative",
"axis": {
"title": "Y-axis"
}
},
"color": {
"field": "rfClass",
"type": "nominal",
"legend": {
"title": "RF Class"
}
}
}
}
]
}