#Yan Wang #08/27/2020 #Lab 04

# SPDS
library(tidyverse)
## -- Attaching packages ----------------------------------- tidyverse 1.3.0 --
## √ ggplot2 3.3.2     √ purrr   0.3.4
## √ tibble  3.0.3     √ dplyr   1.0.1
## √ tidyr   1.1.1     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.5.0
## -- Conflicts -------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(units)
## udunits system database from C:/Users/lenovo/Documents/R/win-library/4.0/units/share/udunits
# Data
library(USAboundaries)
library(rnaturalearth)
library(readxl)

# Visualization
library(gghighlight)
library(ggrepel)
library(knitr)
library(rmapshaper)
## Registered S3 method overwritten by 'geojsonlint':
##   method         from 
##   print.location dplyr
library(leaflet)

#Question 1

conus = USAboundaries::us_states() %>% 
  filter(!state_name %in% c("Puerto Rico", "Alaska", "Hawaii")) %>% 
  st_transform(5070)
county_centroid = st_centroid(conus) %>%
  st_combine() %>%
  st_cast("MULTIPOINT")
## Warning in st_centroid.sf(conus): st_centroid assumes attributes are constant
## over geometries of x
v_grid = st_voronoi(county_centroid) %>% 
  st_cast() %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())
t_grid = st_triangulate(county_centroid) %>% 
  st_cast() %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())
sq_grid = st_make_grid(conus, n = c(70, 50)) %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())
hex_grid = st_make_grid(conus, n = c(70, 50), square = FALSE) %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())
simplified = ms_simplify(conus, keep = 0.05)
original_points = mapview::npts(conus)
simplified_points = mapview::npts(simplified)

I remove 10375 points from the original object. This simplification will leads to some loss on details of the border.

plot_tess = function(data, title){
  ggplot() + 
    geom_sf(data = data, fill = "white", col = "black", size = .2) +   
    theme_void() +
    labs(title = title, caption = paste("This tesselation has:", nrow(data), "tiles" )) +
    theme(plot.title = element_text(hjust = .5, color =  "green", face = "bold"))
}

plot_tess(data = conus, "Original Data")

v_grid = st_intersection(v_grid, st_union(conus))
plot_tess(v_grid, "Voroni Coverage") + 
  geom_sf(data = county_centroid, col = "darkred", size = .2)

t_grid = st_intersection(t_grid, st_union(conus))
plot_tess(t_grid, "Triangle Coverage") + 
  geom_sf(data = county_centroid, col = "darkred", size = .3)

plot_tess(sq_grid, "Square Coverage")

plot_tess(hex_grid, "Hexegonal Coverage")

#Question 2

sum_tess = function(data, title) {
  area = st_area(data) %>% 
    units::set_units("km2") %>%
    units::drop_units() 
  
  data_frame(title, nrow(data), mean(area), sd(area), sum(area)) 
}

tess_summary = bind_rows(
  sum_tess(conus, "Original"),
  sum_tess(v_grid, "Voroni"),
  sum_tess(t_grid, "Triangular"),
  sum_tess(sq_grid, "Square"),
  sum_tess(hex_grid, "Hexagonal"))

knitr::kable(tess_summary, caption = "Summarizing Tessellated Surfaces", col.names = c("Tesselation","Features","Mean Area (km2)","Standard Deviation ","Total Area (km2)"), format.args = list(big.mark = ",", scientific = F))
Summarizing Tessellated Surfaces
Tesselation Features Mean Area (km2) Standard Deviation Total Area (km2)
Original 49 159,950.664 123,638.8 7,837,583
Voroni 49 159,950.664 102,893.8 7,837,583
Triangular 88 71,120.044 46,611.8 6,258,564
Square 2,242 3,819.376 0.0 8,563,041
Hexagonal 2,271 3,763.052 0.0 8,545,891

#Question 3

NID2019_U <- read_excel("~/github/geog-176A-labs/data/NID2019_U.xlsx") %>% 
  filter(!is.na(LONGITUDE)) %>% 
  filter(!is.na(LATITUDE))
sf_NID2019_U <- NID2019_U%>% 
    st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>%
  st_transform(5070)
point_in_polygon3 = function(points, polygon, id){
  st_join(polygon, points) %>%
    st_drop_geometry() %>%
    count(.data[[id]]) %>%
    setNames(c(id, "n")) %>%
    left_join(polygon, by = id) %>%
    st_as_sf()
}
plot_pip = function(data, title){
  ggplot() +
    geom_sf(data = data, aes(fill = log(n)), alpha = .9, size = .2) +
    scale_fill_gradient(low = "white", high = "darkgreen") +
    theme_void() +
    theme(legend.position = 'none',
          plot.title = element_text(face = "bold", color = "darkgreen", hjust = .5, size = 18)) +
    labs(title = title,
         caption = paste0(sum(data$n), "Dams represented"))
}
point_in_polygon3(sf_NID2019_U, conus, "geoid") %>% 
  plot_pip("Dams Per County")

point_in_polygon3(sf_NID2019_U, v_grid, "id") %>% 
  plot_pip("Dams per Voronoi")

point_in_polygon3(sf_NID2019_U, t_grid, "id") %>% 
  plot_pip("Dams per Triangle")

point_in_polygon3(sf_NID2019_U, sq_grid, "id") %>% 
  plot_pip("Dams per Grid")

point_in_polygon3(sf_NID2019_U, hex_grid, "id") %>% 
  plot_pip("Dams per Hexagon")

Different tessellations result in different presentation of the density. The modifiable areal unit problem (MAUP) can essentially affect statistical results when point-based measures of spatial phenomena are aggregated into districts.For example, for the central American, in the original map, area with the lowest density of dams seems to be Texas. In the map with voronoi grids, area with the lowest density of dams seems to move to Oklahoma. In the map with triangle grids, area with the lowest density of dams seems to move to Nebraska.I would like to choose the hexagonal tessellation because it divides subjects into the smallest grid with equal areas, leading to less bias.

#Question 4

Dam purposes: Irrigation, Hydroelectric, Flood Control, Water Supply, and Fish and Wildlife

I_NID2019_U <- sf_NID2019_U %>% 
  filter(grepl("I", sf_NID2019_U$PURPOSES) == TRUE)
point_in_polygon3(I_NID2019_U, hex_grid, "id") %>% 
  plot_pip("Dams Serving for Irrigation") +
  gghighlight(n > (mean(n) + sd(n)))

H_NID2019_U <- sf_NID2019_U %>% 
  filter(grepl("H", sf_NID2019_U$PURPOSES) == TRUE)
point_in_polygon3(H_NID2019_U, hex_grid, "id") %>% 
  plot_pip("Dams Serving for Hydroelectric") +
  gghighlight(n > (mean(n) + sd(n)))

C_NID2019_U <- sf_NID2019_U %>% 
  filter(grepl("C", sf_NID2019_U$PURPOSES) == TRUE)
point_in_polygon3(C_NID2019_U, hex_grid, "id") %>% 
  plot_pip("Dams Serving for Flood Control") +
  gghighlight(n > (mean(n) + sd(n)))

S_NID2019_U <- sf_NID2019_U %>% 
  filter(grepl("S", sf_NID2019_U$PURPOSES) == TRUE)
point_in_polygon3(S_NID2019_U, hex_grid, "id") %>% 
  plot_pip("Dams Serving for Water Supply") +
  gghighlight(n > (mean(n) + sd(n)))

F_NID2019_U <- sf_NID2019_U %>% 
  filter(grepl("F", sf_NID2019_U$PURPOSES) == TRUE)
point_in_polygon3(F_NID2019_U, hex_grid, "id") %>% 
  plot_pip("Dams Serving for Fish and Wildlife") +
  gghighlight(n > (mean(n) + sd(n)))

* Dams serving for irrigation: Center in the northwest and southeast of the United States. The northwest part concentrated at the east of the Rockies lies on the leeward sides of the prevailing westerly winds. Moisture from the Pacific is almost impossible to come by, making it one of the driest parts of the United States, so dams for irrigation are concentrated in this area. The southeast part, near the Gulf Coast, suffers from seasonal drought due to the warm Gulf Stream, so dams used for irrigation are concentrated in this region. * Dams serve for hydroelectric: Center in the western and northeastern coastal areas of the United States. These areas are close to the sea, which is good for generating electricity. * Dams serving for flood control: Center in the central United States. Located in the temperate monsoon climate where rainfall is high and where floods often occur. * Dams serving for water supply: Around large cities, where water demands are high. * Dams serving for fish and wildlife: Close to the inland National Wildlife Refuge where the demand of dams serving for fish and wildlife is high.

#Extra Credit

NID2019_U <- read_excel("~/github/geog-176A-labs/data/NID2019_U.xlsx")

mississippi <- read_sf("~/github/geog-176A-labs/data/MajorRivers") %>% 
  filter(SYSTEM == "Mississippi")
largest_storage = NID2019_U %>% 
  filter(HAZARD == "H") %>% 
  filter(!STATE %in% c("AK", "PR", "HI")) %>% 
  filter(PURPOSES == "C") %>% 
  group_by(STATE) %>% 
  slice_max(NID_STORAGE, n=1)

labels <- largest_storage %>% 
  select(DAM_NAME, NID_STORAGE, PURPOSES, YEAR_COMPLETED)

leaflet() %>% 
  addProviderTiles(providers$CartoDB) %>% 
  addCircleMarkers(data = largest_storage, 
                   color = "red", 
                   fillOpacity = 0.5, 
                   stroke = FALSE, 
                   popup = leafpop::popupTable(labels, feature.id = FALSE, row.numbers = FALSE), 
                   radius = largest_storage$NID_STORAGE/1500000,
  ) %>%
  addPolylines(data = mississippi)