This data is taken from the Two Sigma Kaggle competition and comprises only a marginal subset of the original set.


Read data

twoSig <- read.csv("https://raw.githubusercontent.com/minorsmart/finance/master/twosigma.csv")

Check structure and quality.

str(twoSig)
'data.frame':   494 obs. of  14 variables:
 $ X              : int  1 2 3 4 5 6 7 8 9 10 ...
 $ bathrooms      : num  1 1 1 1 1 2 1 1 1 1 ...
 $ bedrooms       : int  2 1 1 1 1 2 1 0 0 0 ...
 $ building_id    : Factor w/ 354 levels "0","0021440c04241281a436ec21accc40b1",..: 210 1 16 1 278 342 110 1 141 209 ...
 $ created        : Factor w/ 493 levels "2016-04-02 01:16:08",..: 302 264 376 261 167 190 224 93 27 313 ...
 $ description    : Factor w/ 460 levels ""," ","        ",..: 47 22 114 453 268 393 412 211 386 414 ...
 $ display_address: Factor w/ 391 levels ""," 2nd Street",..: 6 98 204 77 229 112 38 85 275 288 ...
 $ latitude       : num  40.8 40.9 40.8 40.7 40.7 ...
 $ listing_id     : int  7055664 7022436 7114083 7019894 6942846 6964554 6992264 6885075 6832089 7067184 ...
 $ longitude      : num  -73.9 -73.9 -74 -74 -74 ...
 $ manager_id     : Factor w/ 346 levels "001ce808ce1720e24a9510e014c69707",..: 296 9 211 314 322 225 314 191 76 148 ...
 $ price          : int  2800 1700 2300 2500 2704 4295 2150 2050 2175 2450 ...
 $ street_address : Factor w/ 460 levels "1 duffield street brooklyn",..: 269 191 194 416 190 258 177 122 96 84 ...
 $ interest_level : Factor w/ 3 levels "high","low","medium": 2 2 3 2 2 2 3 2 2 2 ...
# Missing values
NAtwoSig <- as.numeric(sapply(twoSig, function(x) 100*sum(!is.na(x))/dim(twoSig)[1]))
NAtwoSigDF <- data.frame(Variable = colnames(twoSig), NAs = NAtwoSig, stringsAsFactors = FALSE)
library(ggplot2)
ggplot(NAtwoSigDF,aes(x=Variable,y=NAs,fill=rainbow(dim(NAtwoSigDF)[1]))) +
  geom_col() +
  scale_y_continuous(limits = c(0, 100)) +
  theme(axis.text.x=element_text(angle = -90, hjust = 0)) +
  theme(legend.position="none")

Conclusion: no missing data.

What are we looking at?

library(leaflet)
center <- cbind(twoSig$longitude, twoSig$latitude) %>% colMeans()
s <- max(twoSig$price)
m <- leaflet(width = 900) %>%
  setView(lng = center[1], lat = center[2], zoom = 12) %>%
  addTiles() %>%
  addCircleMarkers(twoSig$longitude, twoSig$latitude, radius = 30*sqrt(twoSig$price/s), color = "royalblue", popup = twoSig$description)
m

Question

Change the popup text in such a way that is shows the address.

LS0tCnRpdGxlOiAiVHdvIFNpZ21hIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgo8aHI+CiMjIyMgVGhpcyBkYXRhIGlzIHRha2VuIGZyb20gdGhlIFtUd28gU2lnbWEgS2FnZ2xlXShodHRwczovL3d3dy5rYWdnbGUuY29tL2MvdHdvLXNpZ21hLWZpbmFuY2lhbC1tb2RlbGluZykgY29tcGV0aXRpb24gYW5kIGNvbXByaXNlcyBvbmx5IGEgbWFyZ2luYWwgc3Vic2V0IG9mIHRoZSBvcmlnaW5hbCBzZXQuCgo8aHI+ClJlYWQgZGF0YQpgYGB7ciwgZWNobz1UUlVFLCBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQp0d29TaWcgPC0gcmVhZC5jc3YoImh0dHBzOi8vcmF3LmdpdGh1YnVzZXJjb250ZW50LmNvbS9taW5vcnNtYXJ0L2ZpbmFuY2UvbWFzdGVyL3R3b3NpZ21hLmNzdiIpCmBgYAoKPGhyPgpDaGVjayBzdHJ1Y3R1cmUgYW5kIHF1YWxpdHkuCmBgYHtyfQpzdHIodHdvU2lnKQojIE1pc3NpbmcgdmFsdWVzCk5BdHdvU2lnIDwtIGFzLm51bWVyaWMoc2FwcGx5KHR3b1NpZywgZnVuY3Rpb24oeCkgMTAwKnN1bSghaXMubmEoeCkpL2RpbSh0d29TaWcpWzFdKSkKTkF0d29TaWdERiA8LSBkYXRhLmZyYW1lKFZhcmlhYmxlID0gY29sbmFtZXModHdvU2lnKSwgTkFzID0gTkF0d29TaWcsIHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSkKCmxpYnJhcnkoZ2dwbG90MikKZ2dwbG90KE5BdHdvU2lnREYsYWVzKHg9VmFyaWFibGUseT1OQXMsZmlsbD1yYWluYm93KGRpbShOQXR3b1NpZ0RGKVsxXSkpKSArCiAgZ2VvbV9jb2woKSArCiAgc2NhbGVfeV9jb250aW51b3VzKGxpbWl0cyA9IGMoMCwgMTAwKSkgKwogIHRoZW1lKGF4aXMudGV4dC54PWVsZW1lbnRfdGV4dChhbmdsZSA9IC05MCwgaGp1c3QgPSAwKSkgKwogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbj0ibm9uZSIpCmBgYAoKQ29uY2x1c2lvbjogbm8gbWlzc2luZyBkYXRhLgo8aHI+CgpXaGF0IGFyZSB3ZSBsb29raW5nIGF0PwoKCmBgYHtyfQpsaWJyYXJ5KGxlYWZsZXQpCmNlbnRlciA8LSBjYmluZCh0d29TaWckbG9uZ2l0dWRlLCB0d29TaWckbGF0aXR1ZGUpICU+JSBjb2xNZWFucygpCnMgPC0gbWF4KHR3b1NpZyRwcmljZSkKbSA8LSBsZWFmbGV0KHdpZHRoID0gOTAwKSAlPiUKICBzZXRWaWV3KGxuZyA9IGNlbnRlclsxXSwgbGF0ID0gY2VudGVyWzJdLCB6b29tID0gMTIpICU+JQogIGFkZFRpbGVzKCkgJT4lCiAgYWRkQ2lyY2xlTWFya2Vycyh0d29TaWckbG9uZ2l0dWRlLCB0d29TaWckbGF0aXR1ZGUsIHJhZGl1cyA9IDMwKnNxcnQodHdvU2lnJHByaWNlL3MpLCBjb2xvciA9ICJyb3lhbGJsdWUiLCBwb3B1cCA9IHR3b1NpZyRkZXNjcmlwdGlvbikKbQpgYGAKCjxocj4KIyMjIFF1ZXN0aW9uCipDaGFuZ2UgdGhlIHBvcHVwIHRleHQgaW4gc3VjaCBhIHdheSB0aGF0IGlzIHNob3dzIHRoZSBhZGRyZXNzLioK