Read data
twoSig <- read.csv("https://raw.githubusercontent.com/minorsmart/finance/master/twosigma.csv")
Check structure and quality.
str(twoSig)
'data.frame': 494 obs. of 14 variables:
$ X : int 1 2 3 4 5 6 7 8 9 10 ...
$ bathrooms : num 1 1 1 1 1 2 1 1 1 1 ...
$ bedrooms : int 2 1 1 1 1 2 1 0 0 0 ...
$ building_id : Factor w/ 354 levels "0","0021440c04241281a436ec21accc40b1",..: 210 1 16 1 278 342 110 1 141 209 ...
$ created : Factor w/ 493 levels "2016-04-02 01:16:08",..: 302 264 376 261 167 190 224 93 27 313 ...
$ description : Factor w/ 460 levels ""," "," ",..: 47 22 114 453 268 393 412 211 386 414 ...
$ display_address: Factor w/ 391 levels ""," 2nd Street",..: 6 98 204 77 229 112 38 85 275 288 ...
$ latitude : num 40.8 40.9 40.8 40.7 40.7 ...
$ listing_id : int 7055664 7022436 7114083 7019894 6942846 6964554 6992264 6885075 6832089 7067184 ...
$ longitude : num -73.9 -73.9 -74 -74 -74 ...
$ manager_id : Factor w/ 346 levels "001ce808ce1720e24a9510e014c69707",..: 296 9 211 314 322 225 314 191 76 148 ...
$ price : int 2800 1700 2300 2500 2704 4295 2150 2050 2175 2450 ...
$ street_address : Factor w/ 460 levels "1 duffield street brooklyn",..: 269 191 194 416 190 258 177 122 96 84 ...
$ interest_level : Factor w/ 3 levels "high","low","medium": 2 2 3 2 2 2 3 2 2 2 ...
# Missing values
NAtwoSig <- as.numeric(sapply(twoSig, function(x) 100*sum(!is.na(x))/dim(twoSig)[1]))
NAtwoSigDF <- data.frame(Variable = colnames(twoSig), NAs = NAtwoSig, stringsAsFactors = FALSE)
library(ggplot2)
ggplot(NAtwoSigDF,aes(x=Variable,y=NAs,fill=rainbow(dim(NAtwoSigDF)[1]))) +
geom_col() +
scale_y_continuous(limits = c(0, 100)) +
theme(axis.text.x=element_text(angle = -90, hjust = 0)) +
theme(legend.position="none")
What are we looking at?
library(leaflet)
center <- cbind(twoSig$longitude, twoSig$latitude) %>% colMeans()
s <- max(twoSig$price)
m <- leaflet(width = 900) %>%
setView(lng = center[1], lat = center[2], zoom = 12) %>%
addTiles() %>%
addCircleMarkers(twoSig$longitude, twoSig$latitude, radius = 30*sqrt(twoSig$price/s), color = "royalblue", popup = twoSig$description)
m
Change the popup text in such a way that is shows the address.