Basic usage of the package.
First, let’s create 5 clusters normally distributed around 1 to 5, with sd of 0.3:
## id V1 V2 true_clust
## 1 1 1.2104301 1.3683836 1
## 2 2 1.4491215 0.4625674 1
## 3 3 1.2559885 0.9372232 1
## 4 4 0.8384915 0.7257761 1
## 5 5 1.3445816 0.8939351 1
## 6 6 1.3381513 1.7039413 1
## 7 7 0.6435033 1.3375870 1
## 8 8 0.8303762 1.0858607 1
## 9 9 0.2114928 0.9388908 1
## 10 10 1.1833862 1.1472999 1
## 11 11 1.4171330 1.6443124 1
## 12 12 1.3276149 1.3874632 1
## 13 13 1.5965528 1.3105140 1
## 14 14 0.7908713 0.3863466 1
## 15 15 1.1231271 1.0351274 1
## 16 16 1.0951040 0.8324043 1
## 17 17 1.4269767 0.7685852 1
## 18 18 0.6916302 1.2483960 1
## 19 19 0.9714620 0.6577174 1
## 20 20 1.3784464 1.5836677 1
## 21 21 0.7969110 1.2915563 1
## 22 22 1.2691834 0.7648148 1
## 23 23 1.7154622 0.8518827 1
## 24 24 1.1913115 0.9343243 1
## 25 25 1.1243154 0.9461786 1
## 26 26 0.8732431 0.9824090 1
## 27 27 1.3918277 1.0916876 1
## 28 28 1.5159796 1.2975739 1
## 29 29 1.5274221 1.2271982 1
## 30 30 1.0974767 0.9427814 1
## 31 31 1.2828950 0.7675730 1
## 32 32 1.3499113 0.4990147 1
## 33 33 0.6995950 1.7106876 1
## 34 34 1.1639380 1.4218098 1
## 35 35 0.9888706 0.7425405 1
## 36 36 1.1338190 1.2437757 1
## 37 37 0.8432348 1.1731891 1
## 38 38 0.8849448 1.2731927 1
## 39 39 0.8983317 1.2373607 1
## 40 40 0.7187551 0.9990338 1
## 41 41 0.4915366 1.2359479 1
## 42 42 1.5570788 0.7139513 1
## 43 43 0.6810858 1.7778102 1
## 44 44 0.9350087 1.2944691 1
## 45 45 0.7820555 0.9061553 1
## 46 46 1.3198188 0.6327240 1
## 47 47 1.2693935 1.2556958 1
## 48 48 1.2655209 0.7677790 1
## 49 49 1.0492183 1.0071171 1
## 50 50 1.3436999 0.8914861 1
## 51 51 0.8649826 0.4393002 1
## 52 52 0.8373202 1.2861909 1
## 53 53 1.3380771 0.9031222 1
## 54 54 1.0646184 0.8269146 1
## 55 55 1.0562299 1.1478297 1
## 56 56 0.7874496 0.6551511 1
## 57 57 0.8406047 0.8766421 1
## 58 58 1.3362826 1.0910906 1
## 59 59 0.8672484 0.9945076 1
## 60 60 1.1358433 0.6998124 1
## 61 61 0.9683520 0.6204357 1
## 62 62 1.0711068 0.7565281 1
## 63 63 0.8514140 1.5520788 1
## 64 64 1.2442232 0.6942918 1
## 65 65 1.2103619 1.1214371 1
## 66 66 1.0695594 0.8500226 1
## 67 67 1.1367212 1.0370786 1
## 68 68 0.9939847 1.0128525 1
## 69 69 1.2033169 1.1006504 1
## 70 70 1.0782586 0.6374707 1
## 71 71 0.2288410 0.6416650 1
## 72 72 1.2769338 1.0937538 1
## 73 73 1.1421456 0.9497381 1
## 74 74 0.7868260 1.1260806 1
## 75 75 1.0532944 1.0440163 1
## 76 76 0.5311549 0.7625642 1
## 77 77 0.8498302 1.3852686 1
## 78 78 0.8344997 0.9354545 1
## 79 79 1.2074861 0.2554480 1
## 80 80 0.7307841 1.1591555 1
## 81 81 0.8972436 1.1210308 1
## 82 82 1.2554231 0.5900796 1
## 83 83 1.2608541 0.8421847 1
## 84 84 0.8130890 1.0557354 1
## 85 85 0.8404473 0.8711645 1
## 86 86 0.8349969 1.4003360 1
## 87 87 1.0816576 1.4512211 1
## 88 88 0.9149284 1.5146929 1
## 89 89 0.9911115 0.8629948 1
## 90 90 0.7919034 0.8436254 1
## 91 91 0.4852926 0.9230567 1
## 92 92 1.2506452 1.0277350 1
## 93 93 0.9789287 0.8972017 1
## 94 94 1.4397487 0.7961277 1
## 95 95 1.3050221 0.9499535 1
## 96 96 0.7019074 1.3787199 1
## 97 97 0.6176745 1.1109013 1
## 98 98 0.8390911 0.8890517 1
## 99 99 0.6900021 0.7087577 1
## 100 100 0.6799433 0.8882345 1
## 101 101 2.0391639 2.0181653 2
## 102 102 2.4388016 2.4557976 2
## 103 103 2.0174240 2.5521841 2
## 104 104 1.8587122 1.4773660 2
## 105 105 2.2228069 1.8643797 2
## 106 106 2.2205658 2.5838951 2
## 107 107 2.3288492 2.3399334 2
## 108 108 1.8282585 1.7239360 2
## 109 109 1.9028747 1.7636406 2
## 110 110 2.1315162 1.8946201 2
## 111 111 1.9070960 2.5482367 2
## 112 112 1.3471719 1.9344844 2
## 113 113 2.3481745 2.6074791 2
## 114 114 1.8829352 2.0381267 2
## 115 115 2.0074736 2.1634433 2
## 116 116 1.5513230 2.4508656 2
## 117 117 2.0193820 1.9183353 2
## 118 118 2.0312597 2.3190341 2
## 119 119 1.8542131 2.2321659 2
## 120 120 1.9060893 2.0646348 2
## 121 121 2.4722898 1.9740046 2
## 122 122 1.8770491 1.9627591 2
## 123 123 2.0650205 2.3379996 2
## 124 124 1.8670126 2.4002337 2
## 125 125 1.7917625 2.4462792 2
## 126 126 1.7968311 2.1800546 2
## 127 127 1.8854365 1.7858832 2
## 128 128 2.0310415 1.9465927 2
## 129 129 2.1572609 1.3434804 2
## 130 130 1.7041422 2.0584772 2
## 131 131 1.9672706 2.1634058 2
## 132 132 1.3957523 2.2280470 2
## 133 133 1.9491033 1.8333605 2
## 134 134 2.7426726 1.8499737 2
## 135 135 2.3808409 1.6649506 2
## 136 136 2.1683032 2.1805902 2
## 137 137 1.8711416 2.3592386 2
## 138 138 2.0285836 2.2770725 2
## 139 139 1.8092272 2.4288718 2
## 140 140 1.5577312 2.2269449 2
## 141 141 1.4773338 2.0939169 2
## 142 142 1.5450671 2.0904800 2
## 143 143 1.8244810 2.0558441 2
## 144 144 1.6451175 1.5896367 2
## 145 145 2.3707862 2.3826735 2
## 146 146 2.3383979 2.0764678 2
## 147 147 2.2404646 1.9210600 2
## 148 148 1.9270636 1.7799585 2
## 149 149 1.9005145 2.0439734 2
## 150 150 2.4991068 1.8043598 2
## 151 151 2.6816245 1.8650372 2
## 152 152 1.9831453 2.3908543 2
## 153 153 1.4493428 2.1726293 2
## 154 154 1.9927737 1.9988305 2
## 155 155 1.7215717 2.1145590 2
## 156 156 1.3711692 1.8178593 2
## 157 157 2.0527785 1.8409058 2
## 158 158 1.8687704 1.8579349 2
## 159 159 2.0012288 2.3898236 2
## 160 160 2.3538437 1.9176856 2
## 161 161 2.1268875 1.9789530 2
## 162 162 1.9698392 1.9783816 2
## 163 163 2.0237972 1.7201793 2
## 164 164 1.9978177 1.6915337 2
## 165 165 1.7981725 2.3774763 2
## 166 166 1.5054348 1.7052174 2
## 167 167 1.8698982 2.1030603 2
## 168 168 2.1865770 1.7654210 2
## 169 169 2.2238929 2.2340106 2
## 170 170 2.4256951 1.6375617 2
## 171 171 1.9488542 2.3069497 2
## 172 172 2.2238790 1.9878563 2
## 173 173 1.8948674 2.1295271 2
## 174 174 1.8754898 1.6970517 2
## 175 175 1.7590136 1.9580199 2
## 176 176 1.8026681 1.8304441 2
## 177 177 1.7497553 1.9836410 2
## 178 178 1.6672878 1.8854531 2
## 179 179 2.0736994 1.8903495 2
## 180 180 2.6008386 2.0007398 2
## 181 181 1.7572684 1.8883062 2
## 182 182 2.3615302 2.8693811 2
## 183 183 1.6699534 1.8502708 2
## 184 184 1.7321898 2.0740680 2
## 185 185 2.2498669 2.0181920 2
## 186 186 2.2532141 1.9559326 2
## 187 187 1.8039143 2.0612574 2
## 188 188 2.2792179 2.1759185 2
## 189 189 1.5791727 1.3624400 2
## 190 190 1.7931790 2.0183778 2
## 191 191 1.8332814 1.7607641 2
## 192 192 1.8987932 1.8791867 2
## 193 193 1.6623870 1.8733202 2
## 194 194 1.5725394 1.7035323 2
## 195 195 1.5807183 2.1476385 2
## 196 196 2.0456221 1.9865136 2
## 197 197 1.7354827 1.8701078 2
## 198 198 2.2273729 2.3203348 2
## 199 199 1.7379647 2.2614389 2
## 200 200 1.8285528 1.8891335 2
## 201 201 3.0228406 2.5742012 3
## 202 202 2.8264634 2.5193997 3
## 203 203 2.9795499 2.9430383 3
## 204 204 2.5758978 3.3117884 3
## 205 205 2.9320324 3.2966298 3
## 206 206 2.6298736 3.0509339 3
## 207 207 3.5191178 3.1996077 3
## 208 208 3.3145599 2.8849450 3
## 209 209 3.2691203 2.6418992 3
## 210 210 2.7862355 3.6152586 3
## 211 211 2.6568845 3.7019399 3
## 212 212 2.9425532 3.1743587 3
## 213 213 2.5443905 2.6887054 3
## 214 214 3.0179333 4.0055047 3
## 215 215 3.1292554 2.7458439 3
## 216 216 3.0196192 3.2530946 3
## 217 217 2.9128796 3.0726575 3
## 218 218 3.6563366 3.2661185 3
## 219 219 2.6945793 3.1150197 3
## 220 220 3.4230353 2.9653637 3
## 221 221 3.1687852 2.7597259 3
## 222 222 2.7532207 2.6670718 3
## 223 223 2.4448156 2.9681616 3
## 224 224 2.4300829 2.9822555 3
## 225 225 2.8616584 3.1300119 3
## 226 226 3.2975809 2.6457393 3
## 227 227 3.0014331 3.2918639 3
## 228 228 3.7418102 2.7802559 3
## 229 229 2.6969278 2.5702803 3
## 230 230 3.3951944 3.0383523 3
## 231 231 3.3107304 2.8405802 3
## 232 232 2.8476121 3.0831595 3
## 233 233 3.1908668 2.9201190 3
## 234 234 2.3890699 3.1932735 3
## 235 235 3.1141137 3.0658503 3
## 236 236 2.6316894 2.8234095 3
## 237 237 3.3142624 2.9993528 3
## 238 238 3.1911930 3.1066114 3
## 239 239 2.9683849 3.4586191 3
## 240 240 2.6095891 2.9251905 3
## 241 241 3.1909567 3.2204691 3
## 242 242 3.1804196 3.8110207 3
## 243 243 2.8824807 2.6830037 3
## 244 244 3.0821712 2.8421912 3
## 245 245 2.7967161 3.2160224 3
## 246 246 3.2738716 2.9243666 3
## 247 247 3.0570122 3.2224496 3
## 248 248 2.7290787 2.8826562 3
## 249 249 3.0741243 3.2942872 3
## 250 250 2.7109808 3.0250313 3
## 251 251 2.7592079 2.7649743 3
## 252 252 2.8173936 3.0421431 3
## 253 253 2.0893164 2.4664163 3
## 254 254 3.2908883 3.3662660 3
## 255 255 3.3613008 3.7782209 3
## 256 256 2.9399723 3.0523287 3
## 257 257 3.2103825 2.6404745 3
## 258 258 3.0918192 3.1156866 3
## 259 259 2.9687267 3.4040006 3
## 260 260 2.8655536 2.5524561 3
## 261 261 2.9082046 2.6380456 3
## 262 262 2.5406484 2.9733453 3
## 263 263 3.1332768 3.3851481 3
## 264 264 2.5979987 3.1081696 3
## 265 265 3.2099134 3.2326707 3
## 266 266 2.1269733 2.5703307 3
## 267 267 2.9056134 2.7590293 3
## 268 268 2.9144735 2.8913101 3
## 269 269 2.9409252 3.6665764 3
## 270 270 2.1816313 3.0306729 3
## 271 271 3.0241775 3.3440866 3
## 272 272 2.8901780 2.8408377 3
## 273 273 2.7263616 2.7822060 3
## 274 274 3.3628130 3.3399851 3
## 275 275 2.9206051 2.9606082 3
## 276 276 3.3774914 3.4511808 3
## 277 277 3.0255056 3.2377552 3
## 278 278 2.8056764 3.3026700 3
## 279 279 3.1158295 2.7345826 3
## 280 280 3.0193448 2.6935288 3
## 281 281 2.3713686 2.8172566 3
## 282 282 2.3228749 3.2678787 3
## 283 283 3.1443417 2.6714102 3
## 284 284 2.9217835 2.9460468 3
## 285 285 2.9346401 2.6732815 3
## 286 286 3.1988354 2.8717862 3
## 287 287 2.6853385 2.9613781 3
## 288 288 3.1631581 3.2231121 3
## 289 289 2.4083477 3.1599477 3
## 290 290 3.0519035 2.3891263 3
## 291 291 2.8201274 3.1464579 3
## 292 292 3.1368296 3.0520403 3
## 293 293 3.0997534 2.8530752 3
## 294 294 3.2923619 3.0868825 3
## 295 295 2.7540963 3.2833736 3
## 296 296 2.3546711 2.8627440 3
## 297 297 2.7946858 3.0152221 3
## 298 298 3.1415943 3.1597159 3
## 299 299 2.7195751 2.8931792 3
## 300 300 3.0756822 3.3554485 3
## 301 301 3.6884516 3.7221189 4
## 302 302 3.8288771 3.9053497 4
## 303 303 3.9793156 3.5152524 4
## 304 304 4.2314435 3.7308093 4
## 305 305 3.9485961 3.3284119 4
## 306 306 3.6326828 4.0357080 4
## 307 307 4.0643474 3.9678727 4
## 308 308 3.7726889 4.1642610 4
## 309 309 4.2834901 4.1102333 4
## 310 310 3.9514273 4.2101962 4
## 311 311 4.1346328 3.9975055 4
## 312 312 3.7704318 3.9629276 4
## 313 313 3.6205448 4.0448399 4
## 314 314 4.0025090 3.9998914 4
## 315 315 3.6621169 3.9393469 4
## 316 316 4.7560493 4.2881091 4
## 317 317 4.1279586 4.2358091 4
## 318 318 3.8563616 3.9070165 4
## 319 319 3.9356267 4.0960128 4
## 320 320 4.1732335 4.2190993 4
## 321 321 4.2129027 4.2557813 4
## 322 322 4.5449944 4.1437936 4
## 323 323 3.6240873 4.5588799 4
## 324 324 4.2809168 3.8955731 4
## 325 325 3.5159098 4.1164808 4
## 326 326 4.0959180 3.7298479 4
## 327 327 3.8211777 4.2507414 4
## 328 328 3.8113820 4.4978229 4
## 329 329 4.5300123 3.9777361 4
## 330 330 4.1847565 3.9363343 4
## 331 331 3.9374313 3.4318420 4
## 332 332 4.6762341 4.0253805 4
## 333 333 3.3632409 4.2158958 4
## 334 334 3.8811024 3.3110981 4
## 335 335 3.5423012 3.8583800 4
## 336 336 4.3320430 3.6152731 4
## 337 337 4.3603633 4.2065882 4
## 338 338 4.2168854 4.1724254 4
## 339 339 4.2827776 4.0712652 4
## 340 340 3.4197209 3.9426643 4
## 341 341 3.5634185 3.4434580 4
## 342 342 4.2604882 4.1764107 4
## 343 343 4.6134920 4.0624458 4
## 344 344 4.2002316 4.0790578 4
## 345 345 3.7772521 4.3783260 4
## 346 346 3.6464681 3.6843470 4
## 347 347 4.5281357 4.1168485 4
## 348 348 4.2963845 3.6576726 4
## 349 349 3.4156272 4.1575434 4
## 350 350 3.6951108 4.2392037 4
## 351 351 3.4398336 4.2966140 4
## 352 352 3.6976667 4.4101329 4
## 353 353 4.1619451 4.1456427 4
## 354 354 3.9077289 3.9741202 4
## 355 355 3.6214215 4.1272374 4
## 356 356 4.4873152 4.5416221 4
## 357 357 4.3606232 4.2723192 4
## 358 358 3.5460653 4.2232890 4
## 359 359 3.5559428 3.8824824 4
## 360 360 4.1074274 4.6042786 4
## 361 361 3.7278963 4.1619784 4
## 362 362 4.1190905 3.8522400 4
## 363 363 3.7968741 3.4320511 4
## 364 364 3.7079632 4.0477676 4
## 365 365 3.9656941 4.0240688 4
## 366 366 4.4880741 3.8327801 4
## 367 367 4.1267916 4.0539403 4
## 368 368 3.7907997 3.9493458 4
## 369 369 3.7213180 3.6928604 4
## 370 370 4.6216621 4.6851062 4
## 371 371 4.3557618 3.6822297 4
## 372 372 4.4629404 4.1302436 4
## 373 373 3.9019839 4.3488407 4
## 374 374 3.8233173 3.9189693 4
## 375 375 4.3239639 3.4663861 4
## 376 376 3.8246406 3.8119997 4
## 377 377 3.6997411 4.1857216 4
## 378 378 3.8532408 4.0260158 4
## 379 379 3.6122314 4.0729654 4
## 380 380 3.5028944 3.8320522 4
## 381 381 3.7593790 3.9948957 4
## 382 382 4.3323486 4.1757377 4
## 383 383 3.5453701 4.1819934 4
## 384 384 3.8341699 4.1337212 4
## 385 385 3.5454753 3.9049875 4
## 386 386 3.9949851 4.3941884 4
## 387 387 3.6725118 4.2897380 4
## 388 388 3.9756914 4.0404751 4
## 389 389 4.1410160 3.9063258 4
## 390 390 4.3205476 3.5994082 4
## 391 391 4.3804207 3.7896964 4
## 392 392 3.9929013 3.9956640 4
## 393 393 3.8124848 4.1960163 4
## 394 394 3.9584686 3.7715580 4
## 395 395 4.2983172 4.8471691 4
## 396 396 4.3698917 3.9677632 4
## 397 397 4.0991057 4.1926265 4
## 398 398 3.9928729 4.1775785 4
## 399 399 3.9432750 4.1233490 4
## 400 400 4.4524295 3.7825198 4
## 401 401 5.5133063 5.0450294 5
## 402 402 5.1019782 4.8001901 5
## 403 403 5.1172691 4.9525798 5
## 404 404 5.1910605 4.4270148 5
## 405 405 5.2762189 4.7392097 5
## 406 406 5.7538329 5.1287255 5
## 407 407 5.3225482 4.4610521 5
## 408 408 4.5381460 4.6968687 5
## 409 409 4.5936500 4.9350762 5
## 410 410 5.1226113 4.7935962 5
## 411 411 4.8943111 4.6335769 5
## 412 412 5.4567585 4.5135088 5
## 413 413 5.7292191 5.0221079 5
## 414 414 4.9176475 4.8675722 5
## 415 415 5.1472754 5.2588447 5
## 416 416 4.6608851 4.8356509 5
## 417 417 5.4573166 5.1031367 5
## 418 418 4.9670786 5.0988470 5
## 419 419 5.1181242 5.1269276 5
## 420 420 4.6896242 5.0606529 5
## 421 421 4.4276857 4.7738091 5
## 422 422 4.6688578 5.2349823 5
## 423 423 4.6604069 5.2767647 5
## 424 424 5.2292148 4.8734313 5
## 425 425 5.2921353 5.1933868 5
## 426 426 4.8161732 5.1981492 5
## 427 427 4.8976412 5.0642983 5
## 428 428 5.2836220 4.6629048 5
## 429 429 4.8955325 5.6683789 5
## 430 430 5.0337536 4.6593043 5
## 431 431 5.0759803 4.9800395 5
## 432 432 4.9067557 5.0370765 5
## 433 433 4.7416282 4.8916794 5
## 434 434 4.9160477 5.0655779 5
## 435 435 4.7819251 5.0270777 5
## 436 436 5.0567739 5.0870725 5
## 437 437 4.8634289 5.2379301 5
## 438 438 5.0721565 5.0963812 5
## 439 439 5.2079232 5.1998098 5
## 440 440 4.7704942 5.1582392 5
## 441 441 4.6746363 4.7272163 5
## 442 442 4.4621512 4.6310220 5
## 443 443 4.9204140 5.4607818 5
## 444 444 4.5866643 4.9764021 5
## 445 445 5.1278497 5.3497267 5
## 446 446 4.9780524 4.9725658 5
## 447 447 4.5841538 5.3399144 5
## 448 448 4.8168370 5.0250592 5
## 449 449 5.1074989 4.7561170 5
## 450 450 4.4419290 4.8718176 5
## 451 451 5.2725587 5.0118051 5
## 452 452 4.7283077 5.1266616 5
## 453 453 5.4874816 4.9463756 5
## 454 454 5.1043362 5.0618757 5
## 455 455 4.6466943 5.0227658 5
## 456 456 5.1278688 4.9726940 5
## 457 457 5.3199372 4.8624794 5
## 458 458 5.5166335 5.0561581 5
## 459 459 5.0498237 5.1415792 5
## 460 460 5.1769154 5.3066221 5
## 461 461 4.6530496 4.8915139 5
## 462 462 5.0045274 5.0384046 5
## 463 463 4.8735601 4.5343010 5
## 464 464 4.4360070 4.9546956 5
## 465 465 5.1534273 4.9487389 5
## 466 466 4.5508130 4.7018845 5
## 467 467 4.5984322 4.8850922 5
## 468 468 4.7681095 5.2515668 5
## 469 469 5.2606449 4.5093633 5
## 470 470 4.9324109 4.7596517 5
## 471 471 5.3014319 4.9238760 5
## 472 472 4.3784635 5.3527621 5
## 473 473 4.5349217 5.0800590 5
## 474 474 4.5606143 4.4334509 5
## 475 475 4.5418760 5.3025480 5
## 476 476 5.0434911 5.1425728 5
## 477 477 4.9617557 5.5410689 5
## 478 478 5.1177725 4.7492209 5
## 479 479 4.6515451 5.2139247 5
## 480 480 4.9646100 4.9970420 5
## 481 481 5.4265762 4.6943047 5
## 482 482 5.2314653 5.0259304 5
## 483 483 5.5465440 5.3743223 5
## 484 484 5.1888457 4.6135011 5
## 485 485 4.6169163 5.2088242 5
## 486 486 4.9429869 4.8267085 5
## 487 487 4.8556434 4.4610117 5
## 488 488 4.8040523 4.3878327 5
## 489 489 5.3077146 4.9592097 5
## 490 490 5.0213129 4.9233404 5
## 491 491 5.4606565 4.9351379 5
## 492 492 4.9922979 5.5963421 5
## 493 493 4.8246139 5.4308834 5
## 494 494 4.6460268 5.5015898 5
## 495 495 4.9556460 5.4008026 5
## 496 496 4.9442921 5.0929200 5
## 497 497 5.1365860 5.6318518 5
## 498 498 4.9740604 4.9784884 5
## 499 499 4.8770270 4.9628846 5
## 500 500 5.0317630 4.9630071 5
This is how our data looks like:
data %>% ggplot(aes(x = V1, y = V2, color = factor(true_clust))) +
geom_point() +
scale_color_discrete(name = "true cluster")
Now we can cluster it using kmeans++:
data_for_clust <- data %>% select(id, starts_with("V"))
km <- TGL_kmeans_tidy(data_for_clust,
k = 5,
metric = "euclid",
verbose = TRUE
)
## id column: id
## KMEans: will generate seeds
## KMeans into generate seeds
## at seed 0
## add new core from 82 to 0
## at seed 1
## done update min distance
## seed range 350 450
## picked up 440 dist was 2.59777
## add new core from 440 to 1
## at seed 2
## done update min distance
## seed range 300 400
## picked up 274 dist was 1.3275
## add new core from 274 to 2
## at seed 3
## done update min distance
## seed range 250 350
## picked up 374 dist was 0.729166
## add new core from 374 to 3
## at seed 4
## done update min distance
## seed range 200 300
## picked up 191 dist was 0.613397
## add new core from 191 to 4
## KMEans: reassign after init
## KMEans: iter 0
## KMEans: iter 1 changed 3
## KMEans: iter 1
## KMEans: iter 2 changed 4
## KMEans: iter 2
## KMEans: iter 3 changed 0
The returned list contains 3 fields:
## [1] "centers" "cluster" "size"
km$centers
contains a tibble with clust
column and the cluster centers:
## # A tibble: 5 × 3
## clust V1 V2
## <int> <dbl> <dbl>
## 1 1 1.03 1.00
## 2 2 1.95 2.04
## 3 3 3.94 4.00
## 4 4 4.96 4.98
## 5 5 2.94 3.02
clusters are numbered according to order_func
(see ‘Custom cluster ordering’ section).
km$cluster
contains tibble with id
column with the observation id (1:n
if no id column was supplied), and clust
column with the observation assigned cluster:
## # A tibble: 500 × 2
## id clust
## <chr> <int>
## 1 1 1
## 2 2 1
## 3 3 1
## 4 4 1
## 5 5 1
## 6 6 2
## 7 7 1
## 8 8 1
## 9 9 1
## 10 10 1
## # ℹ 490 more rows
km$size
contains tibble with clust
column and n
column with the number of points in each cluster:
## # A tibble: 5 × 2
## clust n
## <int> <int>
## 1 1 99
## 2 2 102
## 3 3 99
## 4 4 104
## 5 5 96
We can now check our clustering performance - fraction of observations that were classified correctly (Note that match_clusters
function is internal to the package and is used only in this vignette):
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.974
And plot the results:
d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
geom_point() +
scale_color_discrete(name = "cluster") +
scale_shape_discrete(name = "true cluster") +
geom_point(data = km$centers, size = 7, color = "black", shape = "X")
By default, the clusters where ordered using the following function: hclust(dist(cor(t(centers))))
- hclust of the euclidean distance of the correlation matrix of the centers.
We can supply our own function to order the clusters using reorder_func
argument. The function would be applied to each center and he clusters would be ordered by the result.
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 5,
metric = "euclid",
verbose = FALSE,
reorder_func = median
)
km$centers
## # A tibble: 5 × 3
## clust V1 V2
## <int> <dbl> <dbl>
## 1 1 1.05 0.963
## 2 2 1.95 2.06
## 3 3 2.93 3.04
## 4 4 3.88 4.04
## 5 5 4.98 5.00
tglkmeans can deal with missing data, as long as at least one dimension is not missing. for example:
## id V1 V2 true_clust
## 1 1 1.2104301 1.3683836 1
## 2 2 1.4491215 0.4625674 1
## 3 3 NA 0.9372232 1
## 4 4 0.8384915 0.7257761 1
## 5 5 NA 0.8939351 1
## 6 6 1.3381513 1.7039413 1
## 7 7 NA 1.3375870 1
## 8 8 NA 1.0858607 1
## 9 9 0.2114928 0.9388908 1
## 10 10 1.1833862 1.1472999 1
## 11 11 1.4171330 1.6443124 1
## 12 12 1.3276149 1.3874632 1
## 13 13 1.5965528 1.3105140 1
## 14 14 0.7908713 0.3863466 1
## 15 15 1.1231271 1.0351274 1
## 16 16 1.0951040 0.8324043 1
## 17 17 1.4269767 0.7685852 1
## 18 18 NA 1.2483960 1
## 19 19 0.9714620 0.6577174 1
## 20 20 1.3784464 1.5836677 1
## 21 21 NA 1.2915563 1
## 22 22 1.2691834 0.7648148 1
## 23 23 1.7154622 0.8518827 1
## 24 24 1.1913115 0.9343243 1
## 25 25 1.1243154 0.9461786 1
## 26 26 0.8732431 0.9824090 1
## 27 27 1.3918277 1.0916876 1
## 28 28 1.5159796 1.2975739 1
## 29 29 1.5274221 1.2271982 1
## 30 30 1.0974767 0.9427814 1
## 31 31 1.2828950 0.7675730 1
## 32 32 1.3499113 0.4990147 1
## 33 33 NA 1.7106876 1
## 34 34 1.1639380 1.4218098 1
## 35 35 0.9888706 0.7425405 1
## 36 36 NA 1.2437757 1
## 37 37 NA 1.1731891 1
## 38 38 0.8849448 1.2731927 1
## 39 39 0.8983317 1.2373607 1
## 40 40 0.7187551 0.9990338 1
## 41 41 0.4915366 1.2359479 1
## 42 42 1.5570788 0.7139513 1
## 43 43 NA 1.7778102 1
## 44 44 0.9350087 1.2944691 1
## 45 45 NA 0.9061553 1
## 46 46 1.3198188 0.6327240 1
## 47 47 1.2693935 1.2556958 1
## 48 48 1.2655209 0.7677790 1
## 49 49 1.0492183 1.0071171 1
## 50 50 1.3436999 0.8914861 1
## 51 51 0.8649826 0.4393002 1
## 52 52 NA 1.2861909 1
## 53 53 1.3380771 0.9031222 1
## 54 54 1.0646184 0.8269146 1
## 55 55 1.0562299 1.1478297 1
## 56 56 0.7874496 0.6551511 1
## 57 57 0.8406047 0.8766421 1
## 58 58 1.3362826 1.0910906 1
## 59 59 0.8672484 0.9945076 1
## 60 60 1.1358433 0.6998124 1
## 61 61 0.9683520 0.6204357 1
## 62 62 1.0711068 0.7565281 1
## 63 63 0.8514140 1.5520788 1
## 64 64 1.2442232 0.6942918 1
## 65 65 NA 1.1214371 1
## 66 66 1.0695594 0.8500226 1
## 67 67 1.1367212 1.0370786 1
## 68 68 0.9939847 1.0128525 1
## 69 69 1.2033169 1.1006504 1
## 70 70 1.0782586 0.6374707 1
## 71 71 0.2288410 0.6416650 1
## 72 72 1.2769338 1.0937538 1
## 73 73 1.1421456 0.9497381 1
## 74 74 0.7868260 1.1260806 1
## 75 75 1.0532944 1.0440163 1
## 76 76 0.5311549 0.7625642 1
## 77 77 0.8498302 1.3852686 1
## 78 78 NA 0.9354545 1
## 79 79 1.2074861 0.2554480 1
## 80 80 0.7307841 1.1591555 1
## 81 81 0.8972436 1.1210308 1
## 82 82 1.2554231 0.5900796 1
## 83 83 1.2608541 0.8421847 1
## 84 84 0.8130890 1.0557354 1
## 85 85 0.8404473 0.8711645 1
## 86 86 0.8349969 1.4003360 1
## 87 87 1.0816576 1.4512211 1
## 88 88 NA 1.5146929 1
## 89 89 0.9911115 0.8629948 1
## 90 90 0.7919034 0.8436254 1
## 91 91 0.4852926 0.9230567 1
## 92 92 1.2506452 1.0277350 1
## 93 93 0.9789287 0.8972017 1
## 94 94 1.4397487 0.7961277 1
## 95 95 1.3050221 0.9499535 1
## 96 96 0.7019074 1.3787199 1
## 97 97 0.6176745 1.1109013 1
## 98 98 0.8390911 0.8890517 1
## 99 99 0.6900021 0.7087577 1
## 100 100 0.6799433 0.8882345 1
## 101 101 2.0391639 2.0181653 2
## 102 102 2.4388016 2.4557976 2
## 103 103 2.0174240 2.5521841 2
## 104 104 1.8587122 1.4773660 2
## 105 105 2.2228069 1.8643797 2
## 106 106 NA 2.5838951 2
## 107 107 2.3288492 2.3399334 2
## 108 108 1.8282585 1.7239360 2
## 109 109 1.9028747 1.7636406 2
## 110 110 2.1315162 1.8946201 2
## 111 111 1.9070960 2.5482367 2
## 112 112 1.3471719 1.9344844 2
## 113 113 2.3481745 2.6074791 2
## 114 114 1.8829352 2.0381267 2
## 115 115 2.0074736 2.1634433 2
## 116 116 NA 2.4508656 2
## 117 117 2.0193820 1.9183353 2
## 118 118 NA 2.3190341 2
## 119 119 1.8542131 2.2321659 2
## 120 120 1.9060893 2.0646348 2
## 121 121 2.4722898 1.9740046 2
## 122 122 1.8770491 1.9627591 2
## 123 123 2.0650205 2.3379996 2
## 124 124 NA 2.4002337 2
## 125 125 1.7917625 2.4462792 2
## 126 126 NA 2.1800546 2
## 127 127 1.8854365 1.7858832 2
## 128 128 2.0310415 1.9465927 2
## 129 129 2.1572609 1.3434804 2
## 130 130 NA 2.0584772 2
## 131 131 1.9672706 2.1634058 2
## 132 132 NA 2.2280470 2
## 133 133 1.9491033 1.8333605 2
## 134 134 2.7426726 1.8499737 2
## 135 135 NA 1.6649506 2
## 136 136 2.1683032 2.1805902 2
## 137 137 NA 2.3592386 2
## 138 138 NA 2.2770725 2
## 139 139 1.8092272 2.4288718 2
## 140 140 1.5577312 2.2269449 2
## 141 141 1.4773338 2.0939169 2
## 142 142 1.5450671 2.0904800 2
## 143 143 NA 2.0558441 2
## 144 144 NA 1.5896367 2
## 145 145 NA 2.3826735 2
## 146 146 2.3383979 2.0764678 2
## 147 147 2.2404646 1.9210600 2
## 148 148 1.9270636 1.7799585 2
## 149 149 1.9005145 2.0439734 2
## 150 150 2.4991068 1.8043598 2
## 151 151 2.6816245 1.8650372 2
## 152 152 1.9831453 2.3908543 2
## 153 153 1.4493428 2.1726293 2
## 154 154 1.9927737 1.9988305 2
## 155 155 NA 2.1145590 2
## 156 156 1.3711692 1.8178593 2
## 157 157 2.0527785 1.8409058 2
## 158 158 1.8687704 1.8579349 2
## 159 159 2.0012288 2.3898236 2
## 160 160 2.3538437 1.9176856 2
## 161 161 2.1268875 1.9789530 2
## 162 162 1.9698392 1.9783816 2
## 163 163 2.0237972 1.7201793 2
## 164 164 1.9978177 1.6915337 2
## 165 165 1.7981725 2.3774763 2
## 166 166 1.5054348 1.7052174 2
## 167 167 1.8698982 2.1030603 2
## 168 168 2.1865770 1.7654210 2
## 169 169 NA 2.2340106 2
## 170 170 2.4256951 1.6375617 2
## 171 171 1.9488542 2.3069497 2
## 172 172 2.2238790 1.9878563 2
## 173 173 NA 2.1295271 2
## 174 174 1.8754898 1.6970517 2
## 175 175 1.7590136 1.9580199 2
## 176 176 1.8026681 1.8304441 2
## 177 177 NA 1.9836410 2
## 178 178 1.6672878 1.8854531 2
## 179 179 2.0736994 1.8903495 2
## 180 180 2.6008386 2.0007398 2
## 181 181 1.7572684 1.8883062 2
## 182 182 2.3615302 2.8693811 2
## 183 183 1.6699534 1.8502708 2
## 184 184 1.7321898 2.0740680 2
## 185 185 2.2498669 2.0181920 2
## 186 186 2.2532141 1.9559326 2
## 187 187 1.8039143 2.0612574 2
## 188 188 2.2792179 2.1759185 2
## 189 189 NA 1.3624400 2
## 190 190 1.7931790 2.0183778 2
## 191 191 1.8332814 1.7607641 2
## 192 192 1.8987932 1.8791867 2
## 193 193 NA 1.8733202 2
## 194 194 1.5725394 1.7035323 2
## 195 195 1.5807183 2.1476385 2
## 196 196 2.0456221 1.9865136 2
## 197 197 1.7354827 1.8701078 2
## 198 198 2.2273729 2.3203348 2
## 199 199 1.7379647 2.2614389 2
## 200 200 1.8285528 1.8891335 2
## 201 201 3.0228406 2.5742012 3
## 202 202 2.8264634 2.5193997 3
## 203 203 2.9795499 2.9430383 3
## 204 204 2.5758978 3.3117884 3
## 205 205 2.9320324 3.2966298 3
## 206 206 2.6298736 3.0509339 3
## 207 207 3.5191178 3.1996077 3
## 208 208 NA 2.8849450 3
## 209 209 NA 2.6418992 3
## 210 210 NA 3.6152586 3
## 211 211 2.6568845 3.7019399 3
## 212 212 2.9425532 3.1743587 3
## 213 213 2.5443905 2.6887054 3
## 214 214 3.0179333 4.0055047 3
## 215 215 3.1292554 2.7458439 3
## 216 216 NA 3.2530946 3
## 217 217 2.9128796 3.0726575 3
## 218 218 NA 3.2661185 3
## 219 219 2.6945793 3.1150197 3
## 220 220 3.4230353 2.9653637 3
## 221 221 3.1687852 2.7597259 3
## 222 222 2.7532207 2.6670718 3
## 223 223 2.4448156 2.9681616 3
## 224 224 2.4300829 2.9822555 3
## 225 225 2.8616584 3.1300119 3
## 226 226 3.2975809 2.6457393 3
## 227 227 3.0014331 3.2918639 3
## 228 228 3.7418102 2.7802559 3
## 229 229 2.6969278 2.5702803 3
## 230 230 NA 3.0383523 3
## 231 231 3.3107304 2.8405802 3
## 232 232 2.8476121 3.0831595 3
## 233 233 NA 2.9201190 3
## 234 234 2.3890699 3.1932735 3
## 235 235 NA 3.0658503 3
## 236 236 NA 2.8234095 3
## 237 237 3.3142624 2.9993528 3
## 238 238 NA 3.1066114 3
## 239 239 2.9683849 3.4586191 3
## 240 240 NA 2.9251905 3
## 241 241 3.1909567 3.2204691 3
## 242 242 3.1804196 3.8110207 3
## 243 243 2.8824807 2.6830037 3
## 244 244 NA 2.8421912 3
## 245 245 2.7967161 3.2160224 3
## 246 246 NA 2.9243666 3
## 247 247 3.0570122 3.2224496 3
## 248 248 NA 2.8826562 3
## 249 249 3.0741243 3.2942872 3
## 250 250 2.7109808 3.0250313 3
## 251 251 2.7592079 2.7649743 3
## 252 252 2.8173936 3.0421431 3
## 253 253 2.0893164 2.4664163 3
## 254 254 3.2908883 3.3662660 3
## 255 255 3.3613008 3.7782209 3
## 256 256 2.9399723 3.0523287 3
## 257 257 3.2103825 2.6404745 3
## 258 258 3.0918192 3.1156866 3
## 259 259 2.9687267 3.4040006 3
## 260 260 2.8655536 2.5524561 3
## 261 261 2.9082046 2.6380456 3
## 262 262 NA 2.9733453 3
## 263 263 3.1332768 3.3851481 3
## 264 264 NA 3.1081696 3
## 265 265 3.2099134 3.2326707 3
## 266 266 2.1269733 2.5703307 3
## 267 267 2.9056134 2.7590293 3
## 268 268 2.9144735 2.8913101 3
## 269 269 2.9409252 3.6665764 3
## 270 270 2.1816313 3.0306729 3
## 271 271 3.0241775 3.3440866 3
## 272 272 NA 2.8408377 3
## 273 273 2.7263616 2.7822060 3
## 274 274 NA 3.3399851 3
## 275 275 2.9206051 2.9606082 3
## 276 276 3.3774914 3.4511808 3
## 277 277 3.0255056 3.2377552 3
## 278 278 2.8056764 3.3026700 3
## 279 279 3.1158295 2.7345826 3
## 280 280 3.0193448 2.6935288 3
## 281 281 2.3713686 2.8172566 3
## 282 282 2.3228749 3.2678787 3
## 283 283 3.1443417 2.6714102 3
## 284 284 2.9217835 2.9460468 3
## 285 285 2.9346401 2.6732815 3
## 286 286 3.1988354 2.8717862 3
## 287 287 2.6853385 2.9613781 3
## 288 288 3.1631581 3.2231121 3
## 289 289 NA 3.1599477 3
## 290 290 NA 2.3891263 3
## 291 291 2.8201274 3.1464579 3
## 292 292 3.1368296 3.0520403 3
## 293 293 3.0997534 2.8530752 3
## 294 294 3.2923619 3.0868825 3
## 295 295 2.7540963 3.2833736 3
## 296 296 2.3546711 2.8627440 3
## 297 297 2.7946858 3.0152221 3
## 298 298 3.1415943 3.1597159 3
## 299 299 2.7195751 2.8931792 3
## 300 300 3.0756822 3.3554485 3
## 301 301 NA 3.7221189 4
## 302 302 3.8288771 3.9053497 4
## 303 303 NA 3.5152524 4
## 304 304 NA 3.7308093 4
## 305 305 3.9485961 3.3284119 4
## 306 306 3.6326828 4.0357080 4
## 307 307 4.0643474 3.9678727 4
## 308 308 NA 4.1642610 4
## 309 309 4.2834901 4.1102333 4
## 310 310 3.9514273 4.2101962 4
## 311 311 NA 3.9975055 4
## 312 312 3.7704318 3.9629276 4
## 313 313 3.6205448 4.0448399 4
## 314 314 4.0025090 3.9998914 4
## 315 315 3.6621169 3.9393469 4
## 316 316 NA 4.2881091 4
## 317 317 4.1279586 4.2358091 4
## 318 318 3.8563616 3.9070165 4
## 319 319 NA 4.0960128 4
## 320 320 4.1732335 4.2190993 4
## 321 321 4.2129027 4.2557813 4
## 322 322 4.5449944 4.1437936 4
## 323 323 NA 4.5588799 4
## 324 324 4.2809168 3.8955731 4
## 325 325 3.5159098 4.1164808 4
## 326 326 4.0959180 3.7298479 4
## 327 327 3.8211777 4.2507414 4
## 328 328 3.8113820 4.4978229 4
## 329 329 4.5300123 3.9777361 4
## 330 330 4.1847565 3.9363343 4
## 331 331 3.9374313 3.4318420 4
## 332 332 4.6762341 4.0253805 4
## 333 333 3.3632409 4.2158958 4
## 334 334 3.8811024 3.3110981 4
## 335 335 NA 3.8583800 4
## 336 336 4.3320430 3.6152731 4
## 337 337 4.3603633 4.2065882 4
## 338 338 4.2168854 4.1724254 4
## 339 339 4.2827776 4.0712652 4
## 340 340 3.4197209 3.9426643 4
## 341 341 3.5634185 3.4434580 4
## 342 342 4.2604882 4.1764107 4
## 343 343 4.6134920 4.0624458 4
## 344 344 NA 4.0790578 4
## 345 345 3.7772521 4.3783260 4
## 346 346 3.6464681 3.6843470 4
## 347 347 4.5281357 4.1168485 4
## 348 348 4.2963845 3.6576726 4
## 349 349 3.4156272 4.1575434 4
## 350 350 3.6951108 4.2392037 4
## 351 351 3.4398336 4.2966140 4
## 352 352 3.6976667 4.4101329 4
## 353 353 4.1619451 4.1456427 4
## 354 354 3.9077289 3.9741202 4
## 355 355 NA 4.1272374 4
## 356 356 4.4873152 4.5416221 4
## 357 357 4.3606232 4.2723192 4
## 358 358 3.5460653 4.2232890 4
## 359 359 3.5559428 3.8824824 4
## 360 360 4.1074274 4.6042786 4
## 361 361 3.7278963 4.1619784 4
## 362 362 NA 3.8522400 4
## 363 363 3.7968741 3.4320511 4
## 364 364 3.7079632 4.0477676 4
## 365 365 3.9656941 4.0240688 4
## 366 366 4.4880741 3.8327801 4
## 367 367 4.1267916 4.0539403 4
## 368 368 NA 3.9493458 4
## 369 369 3.7213180 3.6928604 4
## 370 370 NA 4.6851062 4
## 371 371 4.3557618 3.6822297 4
## 372 372 4.4629404 4.1302436 4
## 373 373 NA 4.3488407 4
## 374 374 NA 3.9189693 4
## 375 375 4.3239639 3.4663861 4
## 376 376 NA 3.8119997 4
## 377 377 NA 4.1857216 4
## 378 378 3.8532408 4.0260158 4
## 379 379 NA 4.0729654 4
## 380 380 3.5028944 3.8320522 4
## 381 381 3.7593790 3.9948957 4
## 382 382 NA 4.1757377 4
## 383 383 3.5453701 4.1819934 4
## 384 384 3.8341699 4.1337212 4
## 385 385 3.5454753 3.9049875 4
## 386 386 3.9949851 4.3941884 4
## 387 387 3.6725118 4.2897380 4
## 388 388 3.9756914 4.0404751 4
## 389 389 4.1410160 3.9063258 4
## 390 390 4.3205476 3.5994082 4
## 391 391 4.3804207 3.7896964 4
## 392 392 3.9929013 3.9956640 4
## 393 393 NA 4.1960163 4
## 394 394 3.9584686 3.7715580 4
## 395 395 NA 4.8471691 4
## 396 396 4.3698917 3.9677632 4
## 397 397 NA 4.1926265 4
## 398 398 3.9928729 4.1775785 4
## 399 399 3.9432750 4.1233490 4
## 400 400 4.4524295 3.7825198 4
## 401 401 NA 5.0450294 5
## 402 402 5.1019782 4.8001901 5
## 403 403 5.1172691 4.9525798 5
## 404 404 5.1910605 4.4270148 5
## 405 405 5.2762189 4.7392097 5
## 406 406 5.7538329 5.1287255 5
## 407 407 5.3225482 4.4610521 5
## 408 408 4.5381460 4.6968687 5
## 409 409 4.5936500 4.9350762 5
## 410 410 NA 4.7935962 5
## 411 411 4.8943111 4.6335769 5
## 412 412 NA 4.5135088 5
## 413 413 5.7292191 5.0221079 5
## 414 414 4.9176475 4.8675722 5
## 415 415 5.1472754 5.2588447 5
## 416 416 4.6608851 4.8356509 5
## 417 417 5.4573166 5.1031367 5
## 418 418 NA 5.0988470 5
## 419 419 NA 5.1269276 5
## 420 420 4.6896242 5.0606529 5
## 421 421 4.4276857 4.7738091 5
## 422 422 4.6688578 5.2349823 5
## 423 423 4.6604069 5.2767647 5
## 424 424 NA 4.8734313 5
## 425 425 5.2921353 5.1933868 5
## 426 426 4.8161732 5.1981492 5
## 427 427 NA 5.0642983 5
## 428 428 5.2836220 4.6629048 5
## 429 429 4.8955325 5.6683789 5
## 430 430 NA 4.6593043 5
## 431 431 5.0759803 4.9800395 5
## 432 432 4.9067557 5.0370765 5
## 433 433 4.7416282 4.8916794 5
## 434 434 4.9160477 5.0655779 5
## 435 435 4.7819251 5.0270777 5
## 436 436 5.0567739 5.0870725 5
## 437 437 NA 5.2379301 5
## 438 438 NA 5.0963812 5
## 439 439 5.2079232 5.1998098 5
## 440 440 4.7704942 5.1582392 5
## 441 441 4.6746363 4.7272163 5
## 442 442 4.4621512 4.6310220 5
## 443 443 4.9204140 5.4607818 5
## 444 444 4.5866643 4.9764021 5
## 445 445 NA 5.3497267 5
## 446 446 NA 4.9725658 5
## 447 447 4.5841538 5.3399144 5
## 448 448 4.8168370 5.0250592 5
## 449 449 5.1074989 4.7561170 5
## 450 450 4.4419290 4.8718176 5
## 451 451 5.2725587 5.0118051 5
## 452 452 4.7283077 5.1266616 5
## 453 453 5.4874816 4.9463756 5
## 454 454 5.1043362 5.0618757 5
## 455 455 4.6466943 5.0227658 5
## 456 456 5.1278688 4.9726940 5
## 457 457 NA 4.8624794 5
## 458 458 5.5166335 5.0561581 5
## 459 459 NA 5.1415792 5
## 460 460 5.1769154 5.3066221 5
## 461 461 4.6530496 4.8915139 5
## 462 462 NA 5.0384046 5
## 463 463 4.8735601 4.5343010 5
## 464 464 4.4360070 4.9546956 5
## 465 465 5.1534273 4.9487389 5
## 466 466 4.5508130 4.7018845 5
## 467 467 4.5984322 4.8850922 5
## 468 468 4.7681095 5.2515668 5
## 469 469 NA 4.5093633 5
## 470 470 NA 4.7596517 5
## 471 471 5.3014319 4.9238760 5
## 472 472 4.3784635 5.3527621 5
## 473 473 4.5349217 5.0800590 5
## 474 474 4.5606143 4.4334509 5
## 475 475 NA 5.3025480 5
## 476 476 NA 5.1425728 5
## 477 477 4.9617557 5.5410689 5
## 478 478 NA 4.7492209 5
## 479 479 4.6515451 5.2139247 5
## 480 480 4.9646100 4.9970420 5
## 481 481 5.4265762 4.6943047 5
## 482 482 5.2314653 5.0259304 5
## 483 483 5.5465440 5.3743223 5
## 484 484 NA 4.6135011 5
## 485 485 4.6169163 5.2088242 5
## 486 486 4.9429869 4.8267085 5
## 487 487 NA 4.4610117 5
## 488 488 4.8040523 4.3878327 5
## 489 489 5.3077146 4.9592097 5
## 490 490 5.0213129 4.9233404 5
## 491 491 5.4606565 4.9351379 5
## 492 492 4.9922979 5.5963421 5
## 493 493 4.8246139 5.4308834 5
## 494 494 4.6460268 5.5015898 5
## 495 495 4.9556460 5.4008026 5
## 496 496 NA 5.0929200 5
## 497 497 5.1365860 5.6318518 5
## 498 498 4.9740604 4.9784884 5
## 499 499 4.8770270 4.9628846 5
## 500 500 5.0317630 4.9630071 5
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 5,
metric = "euclid",
verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.96
and plotting the results (without the NA’s) we get:
d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
geom_point() +
scale_color_discrete(name = "cluster") +
scale_shape_discrete(name = "true cluster") +
geom_point(data = km$centers, size = 7, color = "black", shape = "X")
## Warning: Removed 100 rows containing missing values (`geom_point()`).
Let’s move to higher dimensions (and higher noise):
data <- simulate_data(n = 100, sd = 0.3, nclust = 30, dims = 300)
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 30,
metric = "euclid",
verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 1
Let’s compare it to R vanilla kmeans:
km_standard <- kmeans(data %>% select(starts_with("V")), 30)
km_standard$clust <- tibble(id = 1:nrow(data), clust = km_standard$cluster)
d <- tglkmeans:::match_clusters(data, km_standard, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.6
We can see that kmeans++ clusters significantly better than R vanilla kmeans.
we can set the seed for the c++ random number generator, for reproducible results:
km1 <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 30,
metric = "euclid",
verbose = FALSE,
seed = 60427
)
km2 <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 30,
metric = "euclid",
verbose = FALSE,
seed = 60427
)
all(km1$centers[, -1] == km2$centers[, -1])
## [1] TRUE