#1. Prepare data newyork<-read.csv("NYC_Free_Public_WiFi_03292017.csv") attach(newyork) newyorkdf<-data.frame(newyork$LAT,newyork$LON)
#2. Determine number of clusters wss <- (nrow(newyorkdf)-1)*sum(apply(newyorkdf,2,var)) for (i in 2:20) wss[i] <- sum(kmeans(newyorkdf, centers=i)$withinss) plot(1:20, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")
#3. K-Means Cluster Analysis set.seed(20) fit <- kmeans(newyorkdf, 11) # 11 cluster solution # get cluster means aggregate(newyorkdf,by=list(fit$cluster),FUN=mean) # append cluster assignment newyorkdf <- data.frame(newyorkdf, fit$cluster) newyorkdf newyorkdf$fit.cluster <- as.factor(newyorkdf$fit.cluster) library(ggplot2) ggplot(newyorkdf, aes(x=newyork.LON, y=newyork.LAT, color = newyorkdf$fit.cluster)) + geom_point()
# devtools::install_github("zachcp/nycmaps") library(nycmaps) map(database="nyc") #this should also work with ggplot and ggalt nyc <- map_data("nyc") gg <- ggplot() gg <- gg + geom_map( data=nyc, map=nyc, aes(x=long, y=lat, map_id=region)) gg + geom_point(data = newyorkdf, aes(x = newyork.LON, y = newyork.LAT), colour = newyorkdf$fit.cluster, alpha = .5) + ggtitle("New York Public WiFi")
Source: https://habr.com/ru/post/442094/
All Articles