#Yan Wang #08/17/2020 #Lab 02

library(tidyverse)
## -- Attaching packages ------------------------ tidyverse 1.3.0 --
## √ ggplot2 3.3.2     √ purrr   0.3.4
## √ tibble  3.0.3     √ dplyr   1.0.1
## √ tidyr   1.1.1     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.5.0
## -- Conflicts --------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
covid = read_csv(url)
## Parsed with column specification:
## cols(
##   date = col_date(format = ""),
##   county = col_character(),
##   state = col_character(),
##   fips = col_character(),
##   cases = col_double(),
##   deaths = col_double()
## )
head(covid)
## # A tibble: 6 x 6
##   date       county    state      fips  cases deaths
##   <date>     <chr>     <chr>      <chr> <dbl>  <dbl>
## 1 2020-01-21 Snohomish Washington 53061     1      0
## 2 2020-01-22 Snohomish Washington 53061     1      0
## 3 2020-01-23 Snohomish Washington 53061     1      0
## 4 2020-01-24 Cook      Illinois   17031     1      0
## 5 2020-01-24 Snohomish Washington 53061     1      0
## 6 2020-01-25 Orange    California 06059     1      0

#Question 1

library(tidyverse)
dat <- covid %>%
  filter(state == "California") %>%
  group_by(county) %>%
  mutate(newCases = cases - lag(cases)) %>%
  ungroup() %>%
  filter(date == max(date))

most_cases = dat %>%
  slice_max(cases, n = 5) %>%
  select(county, cases)

knitr::kable(most_cases,
             caption = "Most Cases California Counties",
             col.names = c("County", "Cases"))
Most Cases California Counties
County Cases
Los Angeles 253176
Riverside 55073
Orange 51936
San Bernardino 50543
San Diego 42477
most_new_cases = dat %>%
  slice_max(newCases, n = 5) %>%
  select(county, newCases)

knitr::kable(most_new_cases,
             caption = "Most New Cases California Counties",
             col.names = c("County", "New Cases"))
Most New Cases California Counties
County New Cases
Los Angeles 1110
San Diego 445
Santa Clara 274
Orange 178
San Joaquin 165
library(readxl)
StatePopulationEstimates <- read_excel("~/github/geog-176A-labs/data/PopulationEstimates.xls", skip = 2) %>% 
  select(pop19 = POP_ESTIMATE_2019, fips = FIPStxt)
covid_population <- inner_join(covid, StatePopulationEstimates, by = 'fips')

most_cases_percapita <- covid_population %>% 
  filter(date == max(date)) %>% 
  filter(state == 'California') %>% 
  mutate(casesPerCapita = (cases / pop19)) %>% 
  arrange(-casesPerCapita) %>% 
  head(5)
knitr::kable(most_cases_percapita, caption = "Most Cumulative Cases Per Capita", col.names = c('Date', 'County', 'State', 'FIPS', 'Cases', 'Deaths', 'Population', 'Cases Per Capita'))
Most Cumulative Cases Per Capita
Date County State FIPS Cases Deaths Population Cases Per Capita
2020-09-12 Imperial California 06025 11274 307 181215 0.0622134
2020-09-12 Kings California 06031 7057 77 152940 0.0461423
2020-09-12 Kern California 06029 30622 326 900202 0.0340168
2020-09-12 Tulare California 06107 15114 247 466195 0.0324199
2020-09-12 Merced California 06047 8541 127 277680 0.0307584

##Question1(10) ### (1) Describe the total number of cases

library(tidyverse)
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
covid = read_csv(url)
## Parsed with column specification:
## cols(
##   date = col_date(format = ""),
##   county = col_character(),
##   state = col_character(),
##   fips = col_character(),
##   cases = col_double(),
##   deaths = col_double()
## )
head(covid)
## # A tibble: 6 x 6
##   date       county    state      fips  cases deaths
##   <date>     <chr>     <chr>      <chr> <dbl>  <dbl>
## 1 2020-01-21 Snohomish Washington 53061     1      0
## 2 2020-01-22 Snohomish Washington 53061     1      0
## 3 2020-01-23 Snohomish Washington 53061     1      0
## 4 2020-01-24 Cook      Illinois   17031     1      0
## 5 2020-01-24 Snohomish Washington 53061     1      0
## 6 2020-01-25 Orange    California 06059     1      0
dat = covid %>%
  filter(state == "California") %>%
  group_by(county) %>%
  mutate(newCases = cases - lag(cases)) %>%
  ungroup() %>%
  filter(date == max(date))
library(tidyverse)
(total_state_cases = dat %>%
    filter(date == max(date)) %>%
    group_by(county) %>%
    summarise(cases = sum(cases, na.rm = TRUE)) %>%
    ungroup() %>%
    summarise(cases = sum(cases, na.rm = TRUE)) %>%
    pull(cases))
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] 760581

##Question1(10) ### (2) Describe the total number of new cases

library(tidyverse)
(total_state_newCases = dat %>%
    filter(date == max(date)) %>%
    group_by(county) %>%
    summarise(newCases = sum(newCases, na.rm = TRUE)) %>%
    ungroup() %>%
    summarise(newCases = sum(newCases, na.rm = TRUE)) %>%
    pull(newCases))
## `summarise()` ungrouping output (override with `.groups` argument)
## [1] 3456

##Question1(10) ### (3) Describe the total number of safe counties

library(readxl)
library(tidyverse)
pop <- read_excel("../data/PopulationEstimates.xls", skip = 2)
(pop3 = pop  %>%
    filter(State == "CA") %>%
    select(pop19 = POP_ESTIMATE_2019, state = State, county = Area_Name, fips = FIPStxt) %>%
    group_by(county) %>%
    slice_max(pop19, n=1))
## # A tibble: 59 x 4
## # Groups:   county [59]
##       pop19 state county              fips 
##       <dbl> <chr> <chr>               <chr>
##  1  1671329 CA    Alameda County      06001
##  2     1129 CA    Alpine County       06003
##  3    39752 CA    Amador County       06005
##  4   219186 CA    Butte County        06007
##  5    45905 CA    Calaveras County    06009
##  6 39512223 CA    California          06000
##  7    21547 CA    Colusa County       06011
##  8  1153526 CA    Contra Costa County 06013
##  9    27812 CA    Del Norte County    06015
## 10   192843 CA    El Dorado County    06017
## # ... with 49 more rows
(dat2 = covid %>%
    filter(state == "California") %>%
    group_by(county) %>%
    mutate(newCases = cases - lag(cases)) %>%
    ungroup())
## # A tibble: 10,421 x 7
##    date       county      state      fips  cases deaths newCases
##    <date>     <chr>       <chr>      <chr> <dbl>  <dbl>    <dbl>
##  1 2020-01-25 Orange      California 06059     1      0       NA
##  2 2020-01-26 Los Angeles California 06037     1      0       NA
##  3 2020-01-26 Orange      California 06059     1      0        0
##  4 2020-01-27 Los Angeles California 06037     1      0        0
##  5 2020-01-27 Orange      California 06059     1      0        0
##  6 2020-01-28 Los Angeles California 06037     1      0        0
##  7 2020-01-28 Orange      California 06059     1      0        0
##  8 2020-01-29 Los Angeles California 06037     1      0        0
##  9 2020-01-29 Orange      California 06059     1      0        0
## 10 2020-01-30 Los Angeles California 06037     1      0        0
## # ... with 10,411 more rows
pop_dat2 = right_join(pop3, dat2, by = "fips")

(last14Days = pop_dat2 %>%
    filter(date > max(date) - 14, date < max(date)) %>%
    select(county = county.y, newCases, pop19, date) %>%
    group_by(county, pop19) %>%
    summarise(newCases = sum(newCases, na.rm = TRUE)) %>%
    ungroup() %>%
    mutate(newCases_percapita = (newCases/(pop19/100000))))
## `summarise()` regrouping output by 'county' (override with `.groups` argument)
## # A tibble: 58 x 4
##    county         pop19 newCases newCases_percapita
##    <chr>          <dbl>    <dbl>              <dbl>
##  1 Alameda      1671329     1972              118. 
##  2 Alpine          1129        0                0  
##  3 Amador         39752       43              108. 
##  4 Butte         219186      704              321. 
##  5 Calaveras      45905       62              135. 
##  6 Colusa         21547       48              223. 
##  7 Contra Costa 1153526     1358              118. 
##  8 Del Norte      27812        7               25.2
##  9 El Dorado     192843       81               42.0
## 10 Fresno        999101     2171              217. 
## # ... with 48 more rows
library(tidyverse)
(safe_counties = last14Days %>%
    filter(newCases_percapita < 100) %>%
    pull(county))
##  [1] "Alpine"        "Del Norte"     "El Dorado"     "Humboldt"     
##  [5] "Inyo"          "Lake"          "Lassen"        "Mariposa"     
##  [9] "Mono"          "Napa"          "Nevada"        "Placer"       
## [13] "Plumas"        "San Francisco" "Shasta"        "Sierra"       
## [17] "Siskiyou"      "Solano"        "Tehama"        "Trinity"      
## [21] "Tuolumne"

As of August 17, 2020, there are a total of 628508 cases, 6527 new cases within the state of California, and 13 counties in California are safe.

#Question 2

library(ggthemes)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(tidyverse)
covid %>%
  filter(state %in% c('New York', 'California', 'Louisiana', 'Florida')) %>%
  group_by(state, date) %>%
  summarize(cases = sum(cases)) %>%
  ungroup(state, date) %>%
  group_by(state) %>%
  mutate(newCases = cases - lag(cases)) %>%
  mutate(roll7 = rollmean(newCases, 7, fill = NA, align = "right")) %>%
ggplot(aes(x = date)) +
  geom_col(aes(y = newCases), fill = "#F5B8B5") +
  geom_line(aes(y = roll7), col = "darkred", size = 1) +
  labs(title = "New Cases: States",
       x = 'Date',
       y = "Daily New Cases Count",
       caption = "Geog 176A-Lab 02",
       subtitle = "COVID-19 Data: NY-Times",
       color = "") +
  facet_wrap(~state, scales = "free_y") +
  theme(plot.background = element_rect(fill = "white"),
        panel.background = element_rect(fill = "white"),
        plot.title = element_text(size = 15, face = 'bold')) +
  theme(aspect.ratio = .5)
## `summarise()` regrouping output by 'state' (override with `.groups` argument)
## Warning: Removed 4 rows containing missing values (position_stack).
## Warning: Removed 7 row(s) containing missing values (geom_path).

library(tidyverse)
dat2 = covid%>%
  filter(state %in% c('New York', 'California', 'Louisiana', 'Florida')) %>%
  group_by(state, date) %>%
  summarize(cases = sum(cases)) %>%
  ungroup(state, date) %>%
  group_by(state) %>%
  mutate(NewDailyCases = cases - lag(cases)) %>%
  mutate(SevenDayAvg = rollmean(NewDailyCases, 7, fill = NA, align = "right"))
## `summarise()` regrouping output by 'state' (override with `.groups` argument)
StatePopulationEstimates1 <- read_excel("~/github/geog-176A-labs/data/PopulationEstimates.xls", skip = 2) %>% 
  select(pop19 = POP_ESTIMATE_2019, state_abbr = State, state = Area_Name) %>% 
  group_by(state_abbr) %>% 
  slice_max(pop19, n = 1)
newjoineddata = inner_join(dat2, StatePopulationEstimates1, by = 'state')
percapdata <- newjoineddata %>% 
  group_by(state) %>% 
  mutate(NewCasesPerCap = NewDailyCases / pop19) %>% 
  mutate(NewSevenDayAvg = rollmean(NewCasesPerCap, 7, fill = NA, align = "right")) 
percapdata %>% 
  ggplot(aes(x = date)) +
  geom_col(aes(y = NewCasesPerCap), col = "#F5B8B5") +
  geom_line(aes(y = NewSevenDayAvg), col = "darkred", size = 1) +
  labs(title = "New Cases Per Capita: States",
       x = "Date",
       y = "Newcases",
       caption = "Geog 176A-Lab 02",
       subtitle = "COVID-19 Data: NY-Times",
       color = "") +
  facet_wrap(~state, scales = "free_y")
## Warning: Removed 4 rows containing missing values (position_stack).
## Warning: Removed 7 row(s) containing missing values (geom_path).

  theme(plot.background = element_rect(fill = "white"),
      panel.background = element_rect(fill = "white"),
      plot.title = element_text(size = 15, face = 'bold')) +
  theme(aspect.ratio = .5)
## List of 4
##  $ panel.background:List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ plot.background :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ plot.title      :List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "bold"
##   ..$ colour       : NULL
##   ..$ size         : num 15
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ aspect.ratio    : num 0.5
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

Scaling by population, it data presents the directivity of the data. It becomes better for comparing the data because for some areas with absolute small population and small amount of confirmed cases (i.e. Louisiana), the ration of cases over population introduces a objective comparison between different places.It is now comparing the relative amount among different areas.

#Question3

library(readr)
county_centroids <- read_csv("../data/county-centroids.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   fips = col_character(),
##   name = col_character(),
##   state_name = col_character(),
##   LON = col_double(),
##   LAT = col_double()
## )
head(county_centroids)
## # A tibble: 6 x 6
##      X1 fips  name       state_name     LON   LAT
##   <dbl> <chr> <chr>      <chr>        <dbl> <dbl>
## 1     1 39131 Pike       Ohio         -83.1  39.1
## 2     2 46003 Aurora     South Dakota -98.6  43.7
## 3     3 55035 Eau Claire Wisconsin    -91.3  44.7
## 4     4 72145 Vega Baja  Puerto Rico  -66.4  18.4
## 5     5 48259 Kendall    Texas        -98.7  29.9
## 6     6 40015 Caddo      Oklahoma     -98.4  35.2
library(tidyverse)
library(ggplot2)
library(ggthemes)
county2 = county_centroids %>% 
  select(county = name, LON, LAT, fips) 
covid_xy = inner_join(covid, county2, by = c("county", "fips"))
xy1 <- covid_xy %>% 
  mutate(xcoord = cases * LON, ycoord = cases * LAT) %>% 
  group_by(date) %>% 
  summarize(cases = sum(cases), xcoord = sum(xcoord), ycoord = sum(ycoord)) %>% 
  mutate(longitude = xcoord / cases, latitude = ycoord / cases) %>% 
  mutate(month = format(date, "%m")) 
## `summarise()` ungrouping output (override with `.groups` argument)
xy2 <- xy1 %>% 
  group_by(month) %>% 
  summarise(mocases = sum(cases))
## `summarise()` ungrouping output (override with `.groups` argument)
xy3 <- inner_join(xy1, xy2, by = "month") %>%
  select(date, longitude, latitude) 
knitr::kable(xy3, caption = "COVID-19 Weighted Mean", col.names = c("Date","Longitude","Latitude"))
COVID-19 Weighted Mean
Date Longitude Latitude
2020-01-21 -121.71707 48.04616
2020-01-22 -121.71707 48.04616
2020-01-23 -121.71707 48.04616
2020-01-24 -104.76683 44.94380
2020-01-25 -109.09942 41.19636
2020-01-26 -111.60366 38.24914
2020-01-27 -111.60366 38.24914
2020-01-28 -111.60366 38.24914
2020-01-29 -111.60366 38.24914
2020-01-30 -107.63915 38.84786
2020-01-31 -109.64744 38.61689
2020-02-01 -104.82632 39.08077
2020-02-02 -109.56226 38.67105
2020-02-03 -109.56226 38.67105
2020-02-04 -109.56226 38.67105
2020-02-05 -107.88345 39.03726
2020-02-06 -107.88345 39.03726
2020-02-07 -107.88345 39.03726
2020-02-08 -107.88345 39.03726
2020-02-09 -107.88345 39.03726
2020-02-10 -108.56428 38.57552
2020-02-11 -108.56428 38.57552
2020-02-12 -107.84690 37.92372
2020-02-13 -107.22516 37.35883
2020-02-14 -107.22516 37.35883
2020-02-15 -107.22516 37.35883
2020-02-16 -107.22516 37.35883
2020-02-17 -102.79544 38.93337
2020-02-18 -102.79544 38.93337
2020-02-19 -102.79544 38.93337
2020-02-20 -103.33011 39.08625
2020-02-21 -103.60991 38.42267
2020-02-22 -103.60991 38.42267
2020-02-23 -103.60991 38.42267
2020-02-24 -104.87364 38.07401
2020-02-25 -104.83642 38.20319
2020-02-26 -109.12713 38.23063
2020-02-27 -109.12713 38.23063
2020-02-28 -109.76143 38.48641
2020-02-29 -110.13587 38.90233
2020-03-01 -111.28243 39.34318
2020-03-02 -110.84800 39.73677
2020-03-03 -111.41056 39.99539
2020-03-04 -110.43084 40.45149
2020-03-05 -109.57127 40.88431
2020-03-06 -105.91102 40.51154
2020-03-07 -103.26843 40.63572
2020-03-08 -102.28225 40.83108
2020-03-09 -101.84456 40.73198
2020-03-10 -100.96236 41.21422
2020-03-11 -100.79788 41.16062
2020-03-12 -100.19289 41.04925
2020-03-13 -99.58952 40.67829
2020-03-14 -98.84527 40.52105
2020-03-15 -97.91477 40.13559
2020-03-16 -97.50178 40.02873
2020-03-17 -96.01656 39.82074
2020-03-18 -94.34913 39.50682
2020-03-19 -92.33915 39.46876
2020-03-20 -90.94669 39.29020
2020-03-21 -89.72008 39.24798
2020-03-22 -88.34892 39.32213
2020-03-23 -87.50645 39.33203
2020-03-24 -86.99549 39.31713
2020-03-25 -86.55382 39.20133
2020-03-26 -86.41556 39.17700
2020-03-27 -86.25009 39.14841
2020-03-28 -85.88313 39.11974
2020-03-29 -85.65266 39.12286
2020-03-30 -85.55488 39.09877
2020-03-31 -85.34786 38.97939
2020-04-01 -85.31978 38.94598
2020-04-02 -85.27695 38.83296
2020-04-03 -85.06093 38.82488
2020-04-04 -84.80636 38.80190
2020-04-05 -84.80197 38.83043
2020-04-06 -84.64756 38.78922
2020-04-07 -84.53244 38.76139
2020-04-08 -84.41680 38.77909
2020-04-09 -84.32947 38.77330
2020-04-10 -84.22378 38.78812
2020-04-11 -84.14674 38.79967
2020-04-12 -84.08134 38.81974
2020-04-13 -84.05539 38.81248
2020-04-14 -84.01732 38.82617
2020-04-15 -84.00795 38.83153
2020-04-16 -83.92644 38.84950
2020-04-17 -83.90980 38.84142
2020-04-18 -83.91162 38.85681
2020-04-19 -83.85708 38.87202
2020-04-20 -83.90651 38.87152
2020-04-21 -83.95576 38.86410
2020-04-22 -83.97437 38.87452
2020-04-23 -83.99720 38.87166
2020-04-24 -83.98080 38.89894
2020-04-25 -83.95909 38.93252
2020-04-26 -83.95273 38.94297
2020-04-27 -84.00340 38.94399
2020-04-28 -84.03362 38.94572
2020-04-29 -84.11681 38.95063
2020-04-30 -84.16396 38.95718
2020-05-01 -84.22121 38.95421
2020-05-02 -84.26843 38.95446
2020-05-03 -84.32438 38.96485
2020-05-04 -84.37582 38.95973
2020-05-05 -84.44419 38.92172
2020-05-06 -84.53328 38.92182
2020-05-07 -84.58601 38.92061
2020-05-08 -84.67140 38.92201
2020-05-09 -84.74042 38.91410
2020-05-10 -84.76529 38.90989
2020-05-11 -84.80349 38.90960
2020-05-12 -84.87709 38.90173
2020-05-13 -84.94616 38.88774
2020-05-14 -85.00682 38.88050
2020-05-15 -85.08625 38.86982
2020-05-16 -85.13437 38.86690
2020-05-17 -85.16707 38.86239
2020-05-18 -85.20307 38.85868
2020-05-19 -85.25569 38.84749
2020-05-20 -85.30450 38.84315
2020-05-21 -85.36197 38.82275
2020-05-22 -85.42312 38.81585
2020-05-23 -85.48128 38.80660
2020-05-24 -85.52456 38.80460
2020-05-25 -85.56852 38.79158
2020-05-26 -85.64199 38.77474
2020-05-27 -85.70095 38.76076
2020-05-28 -85.75618 38.74666
2020-05-29 -85.83234 38.72761
2020-05-30 -85.91430 38.70781
2020-05-31 -85.98881 38.69112
2020-06-01 -86.01690 38.68471
2020-06-02 -86.09843 38.66545
2020-06-03 -86.16604 38.64309
2020-06-04 -86.22957 38.61457
2020-06-05 -86.33733 38.59848
2020-06-06 -86.40692 38.57111
2020-06-07 -86.48501 38.55056
2020-06-08 -86.54775 38.53332
2020-06-09 -86.62061 38.50641
2020-06-10 -86.70563 38.47271
2020-06-11 -86.79881 38.44074
2020-06-12 -86.89573 38.40253
2020-06-13 -86.97926 38.36053
2020-06-14 -87.03784 38.33068
2020-06-15 -87.11287 38.30151
2020-06-16 -87.22370 38.25373
2020-06-17 -87.33673 38.21032
2020-06-18 -87.44462 38.16938
2020-06-19 -87.56227 38.11467
2020-06-20 -87.67831 38.06004
2020-06-21 -87.78122 38.01913
2020-06-22 -87.91551 37.97090
2020-06-23 -88.06802 37.91299
2020-06-24 -88.18025 37.84783
2020-06-25 -88.29790 37.79360
2020-06-26 -88.41906 37.71825
2020-06-27 -88.51414 37.64474
2020-06-28 -88.62290 37.58025
2020-06-29 -88.73743 37.53063
2020-06-30 -88.89787 37.46082
2020-07-01 -89.04987 37.39379
2020-07-02 -89.17898 37.31934
2020-07-03 -89.30126 37.24508
2020-07-04 -89.40924 37.17542
2020-07-05 -89.49926 37.11915
2020-07-06 -89.60566 37.06909
2020-07-07 -89.74902 37.00895
2020-07-08 -89.86178 36.94521
2020-07-09 -89.96823 36.88564
2020-07-10 -90.06408 36.81952
2020-07-11 -90.14747 36.76352
2020-07-12 -90.20769 36.70237
2020-07-13 -90.28424 36.64737
2020-07-14 -90.39484 36.59461
2020-07-15 -90.47869 36.54428
2020-07-16 -90.57764 36.47711
2020-07-17 -90.66820 36.43112
2020-07-18 -90.73216 36.39086
2020-07-19 -90.78723 36.34311
2020-07-20 -90.85234 36.30849
2020-07-21 -90.93081 36.27228
2020-07-22 -91.01596 36.23502
2020-07-23 -91.08417 36.19882
2020-07-24 -91.13900 36.16627
2020-07-25 -91.19623 36.13196
2020-07-26 -91.22094 36.11037
2020-07-27 -91.26255 36.08913
2020-07-28 -91.30714 36.06176
2020-07-29 -91.36696 36.03990
2020-07-30 -91.40591 36.01135
2020-07-31 -91.44895 35.98784
2020-08-01 -91.47764 35.96886
2020-08-02 -91.50515 35.95004
2020-08-03 -91.54262 35.93613
2020-08-04 -91.56652 35.92108
2020-08-05 -91.59286 35.90945
2020-08-06 -91.62896 35.89599
2020-08-07 -91.65413 35.88185
2020-08-08 -91.67574 35.86832
2020-08-09 -91.70192 35.85686
2020-08-10 -91.76003 35.85094
2020-08-11 -91.81139 35.84033
2020-08-12 -91.83849 35.82543
2020-08-13 -91.87426 35.82050
2020-08-14 -91.91626 35.81128
2020-08-15 -91.94898 35.80285
2020-08-16 -91.98262 35.79809
2020-08-17 -92.01049 35.79691
2020-08-18 -92.02914 35.78813
2020-08-19 -92.04135 35.78776
2020-08-20 -92.06251 35.78593
2020-08-21 -92.08065 35.78239
2020-08-22 -92.09270 35.78073
2020-08-23 -92.10525 35.77966
2020-08-24 -92.12248 35.77972
2020-08-25 -92.14593 35.77989
2020-08-26 -92.15682 35.77987
2020-08-27 -92.16885 35.78465
2020-08-28 -92.18042 35.78736
2020-08-29 -92.18264 35.78903
2020-08-30 -92.19093 35.79042
2020-08-31 -92.20632 35.79503
2020-09-01 -92.20042 35.78902
2020-09-02 -92.24419 35.78253
2020-09-03 -92.25249 35.78691
2020-09-04 -92.25787 35.80027
2020-09-05 -92.26169 35.80377
2020-09-06 -92.26490 35.80668
2020-09-07 -92.26130 35.81101
2020-09-08 -92.26779 35.81404
2020-09-09 -92.27203 35.81653
2020-09-10 -92.27199 35.82164
2020-09-11 -92.26961 35.82512
2020-09-12 -92.26811 35.83135
xy4 <- xy2 %>% 
  select(month, mocases)
knitr::kable(xy4, caption = "Monthly New Cases", col.names = c("Month","New Cases"))
Monthly New Cases
Month New Cases
01 41
02 736
03 790711
04 15761769
05 38777213
06 57806205
07 102964255
08 156722282
09 71422826
ggplot(data = xy1, aes(x = longitude, y = latitude)) +
  borders("state", fill = "gray", colour = "white") +
  geom_point(aes(color = month, size = cases)) +
  labs(title = "COVID-19 Weighted Mean",
       x = "Longitude",
       y = "Latitude",
       caption = "Geog 176A-Lab 02",
       subtitle = "COVID-19 Data: NY-Times",
       color = "")

Weight is a relative concept. The weight of the weighted average reflects the relative importance in the overall evaluation. The weight indicates that in the evaluation process, it is the quantitative allocation of the importance degree of different aspects of the evaluated object, and the role of each evaluation factor in the overall evaluation is treated differently. In fact, an evaluation without a focus is not an objective evaluation. Weight indicates how important certain data is in a set of data, so the weighted average effect must be studied in combination with specific examples. The size of the weighted average is not only related to each data in a set but is also affected by the weight of each data. The greater the weight.The greater the effect on the average size. The reverse is smaller.