Geography 176A

#Yan Wang #08/17/2020 #Lab 02

library(tidyverse)

## -- Attaching packages ------------------------ tidyverse 1.3.0 --

## √ ggplot2 3.3.2     √ purrr   0.3.4
## √ tibble  3.0.3     √ dplyr   1.0.1
## √ tidyr   1.1.1     √ stringr 1.4.0
## √ readr   1.3.1     √ forcats 0.5.0

## -- Conflicts --------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
covid = read_csv(url)

## Parsed with column specification:
## cols(
##   date = col_date(format = ""),
##   county = col_character(),
##   state = col_character(),
##   fips = col_character(),
##   cases = col_double(),
##   deaths = col_double()
## )

head(covid)

## # A tibble: 6 x 6
##   date       county    state      fips  cases deaths
##   <date>     <chr>     <chr>      <chr> <dbl>  <dbl>
## 1 2020-01-21 Snohomish Washington 53061     1      0
## 2 2020-01-22 Snohomish Washington 53061     1      0
## 3 2020-01-23 Snohomish Washington 53061     1      0
## 4 2020-01-24 Cook      Illinois   17031     1      0
## 5 2020-01-24 Snohomish Washington 53061     1      0
## 6 2020-01-25 Orange    California 06059     1      0

#Question 1

library(tidyverse)
dat <- covid %>%
  filter(state == "California") %>%
  group_by(county) %>%
  mutate(newCases = cases - lag(cases)) %>%
  ungroup() %>%
  filter(date == max(date))

most_cases = dat %>%
  slice_max(cases, n = 5) %>%
  select(county, cases)

knitr::kable(most_cases,
             caption = "Most Cases California Counties",
             col.names = c("County", "Cases"))

Most Cases California Counties
County	Cases
Los Angeles	253176
Riverside	55073
Orange	51936
San Bernardino	50543
San Diego	42477

most_new_cases = dat %>%
  slice_max(newCases, n = 5) %>%
  select(county, newCases)

knitr::kable(most_new_cases,
             caption = "Most New Cases California Counties",
             col.names = c("County", "New Cases"))

Most New Cases California Counties
County	New Cases
Los Angeles	1110
San Diego	445
Santa Clara	274
Orange	178
San Joaquin	165

library(readxl)
StatePopulationEstimates <- read_excel("~/github/geog-176A-labs/data/PopulationEstimates.xls", skip = 2) %>% 
  select(pop19 = POP_ESTIMATE_2019, fips = FIPStxt)
covid_population <- inner_join(covid, StatePopulationEstimates, by = 'fips')

most_cases_percapita <- covid_population %>% 
  filter(date == max(date)) %>% 
  filter(state == 'California') %>% 
  mutate(casesPerCapita = (cases / pop19)) %>% 
  arrange(-casesPerCapita) %>% 
  head(5)
knitr::kable(most_cases_percapita, caption = "Most Cumulative Cases Per Capita", col.names = c('Date', 'County', 'State', 'FIPS', 'Cases', 'Deaths', 'Population', 'Cases Per Capita'))

Most Cumulative Cases Per Capita
Date	County	State	FIPS	Cases	Deaths	Population	Cases Per Capita
2020-09-12	Imperial	California	06025	11274	307	181215	0.0622134
2020-09-12	Kings	California	06031	7057	77	152940	0.0461423
2020-09-12	Kern	California	06029	30622	326	900202	0.0340168
2020-09-12	Tulare	California	06107	15114	247	466195	0.0324199
2020-09-12	Merced	California	06047	8541	127	277680	0.0307584

##Question1(10) ### (1) Describe the total number of cases

library(tidyverse)
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
covid = read_csv(url)

## Parsed with column specification:
## cols(
##   date = col_date(format = ""),
##   county = col_character(),
##   state = col_character(),
##   fips = col_character(),
##   cases = col_double(),
##   deaths = col_double()
## )

head(covid)

## # A tibble: 6 x 6
##   date       county    state      fips  cases deaths
##   <date>     <chr>     <chr>      <chr> <dbl>  <dbl>
## 1 2020-01-21 Snohomish Washington 53061     1      0
## 2 2020-01-22 Snohomish Washington 53061     1      0
## 3 2020-01-23 Snohomish Washington 53061     1      0
## 4 2020-01-24 Cook      Illinois   17031     1      0
## 5 2020-01-24 Snohomish Washington 53061     1      0
## 6 2020-01-25 Orange    California 06059     1      0

dat = covid %>%
  filter(state == "California") %>%
  group_by(county) %>%
  mutate(newCases = cases - lag(cases)) %>%
  ungroup() %>%
  filter(date == max(date))

library(tidyverse)
(total_state_cases = dat %>%
    filter(date == max(date)) %>%
    group_by(county) %>%
    summarise(cases = sum(cases, na.rm = TRUE)) %>%
    ungroup() %>%
    summarise(cases = sum(cases, na.rm = TRUE)) %>%
    pull(cases))

## `summarise()` ungrouping output (override with `.groups` argument)

## [1] 760581

##Question1(10) ### (2) Describe the total number of new cases

library(tidyverse)
(total_state_newCases = dat %>%
    filter(date == max(date)) %>%
    group_by(county) %>%
    summarise(newCases = sum(newCases, na.rm = TRUE)) %>%
    ungroup() %>%
    summarise(newCases = sum(newCases, na.rm = TRUE)) %>%
    pull(newCases))

## `summarise()` ungrouping output (override with `.groups` argument)

## [1] 3456

##Question1(10) ### (3) Describe the total number of safe counties

library(readxl)
library(tidyverse)
pop <- read_excel("../data/PopulationEstimates.xls", skip = 2)
(pop3 = pop  %>%
    filter(State == "CA") %>%
    select(pop19 = POP_ESTIMATE_2019, state = State, county = Area_Name, fips = FIPStxt) %>%
    group_by(county) %>%
    slice_max(pop19, n=1))

## # A tibble: 59 x 4
## # Groups:   county [59]
##       pop19 state county              fips 
##       <dbl> <chr> <chr>               <chr>
##  1  1671329 CA    Alameda County      06001
##  2     1129 CA    Alpine County       06003
##  3    39752 CA    Amador County       06005
##  4   219186 CA    Butte County        06007
##  5    45905 CA    Calaveras County    06009
##  6 39512223 CA    California          06000
##  7    21547 CA    Colusa County       06011
##  8  1153526 CA    Contra Costa County 06013
##  9    27812 CA    Del Norte County    06015
## 10   192843 CA    El Dorado County    06017
## # ... with 49 more rows

(dat2 = covid %>%
    filter(state == "California") %>%
    group_by(county) %>%
    mutate(newCases = cases - lag(cases)) %>%
    ungroup())

## # A tibble: 10,421 x 7
##    date       county      state      fips  cases deaths newCases
##    <date>     <chr>       <chr>      <chr> <dbl>  <dbl>    <dbl>
##  1 2020-01-25 Orange      California 06059     1      0       NA
##  2 2020-01-26 Los Angeles California 06037     1      0       NA
##  3 2020-01-26 Orange      California 06059     1      0        0
##  4 2020-01-27 Los Angeles California 06037     1      0        0
##  5 2020-01-27 Orange      California 06059     1      0        0
##  6 2020-01-28 Los Angeles California 06037     1      0        0
##  7 2020-01-28 Orange      California 06059     1      0        0
##  8 2020-01-29 Los Angeles California 06037     1      0        0
##  9 2020-01-29 Orange      California 06059     1      0        0
## 10 2020-01-30 Los Angeles California 06037     1      0        0
## # ... with 10,411 more rows

pop_dat2 = right_join(pop3, dat2, by = "fips")

(last14Days = pop_dat2 %>%
    filter(date > max(date) - 14, date < max(date)) %>%
    select(county = county.y, newCases, pop19, date) %>%
    group_by(county, pop19) %>%
    summarise(newCases = sum(newCases, na.rm = TRUE)) %>%
    ungroup() %>%
    mutate(newCases_percapita = (newCases/(pop19/100000))))

## `summarise()` regrouping output by 'county' (override with `.groups` argument)

## # A tibble: 58 x 4
##    county         pop19 newCases newCases_percapita
##    <chr>          <dbl>    <dbl>              <dbl>
##  1 Alameda      1671329     1972              118. 
##  2 Alpine          1129        0                0  
##  3 Amador         39752       43              108. 
##  4 Butte         219186      704              321. 
##  5 Calaveras      45905       62              135. 
##  6 Colusa         21547       48              223. 
##  7 Contra Costa 1153526     1358              118. 
##  8 Del Norte      27812        7               25.2
##  9 El Dorado     192843       81               42.0
## 10 Fresno        999101     2171              217. 
## # ... with 48 more rows

library(tidyverse)
(safe_counties = last14Days %>%
    filter(newCases_percapita < 100) %>%
    pull(county))

##  [1] "Alpine"        "Del Norte"     "El Dorado"     "Humboldt"     
##  [5] "Inyo"          "Lake"          "Lassen"        "Mariposa"     
##  [9] "Mono"          "Napa"          "Nevada"        "Placer"       
## [13] "Plumas"        "San Francisco" "Shasta"        "Sierra"       
## [17] "Siskiyou"      "Solano"        "Tehama"        "Trinity"      
## [21] "Tuolumne"

As of August 17, 2020, there are a total of 628508 cases, 6527 new cases within the state of California, and 13 counties in California are safe.

#Question 2

library(ggthemes)
library(zoo)

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(tidyverse)
covid %>%
  filter(state %in% c('New York', 'California', 'Louisiana', 'Florida')) %>%
  group_by(state, date) %>%
  summarize(cases = sum(cases)) %>%
  ungroup(state, date) %>%
  group_by(state) %>%
  mutate(newCases = cases - lag(cases)) %>%
  mutate(roll7 = rollmean(newCases, 7, fill = NA, align = "right")) %>%
ggplot(aes(x = date)) +
  geom_col(aes(y = newCases), fill = "#F5B8B5") +
  geom_line(aes(y = roll7), col = "darkred", size = 1) +
  labs(title = "New Cases: States",
       x = 'Date',
       y = "Daily New Cases Count",
       caption = "Geog 176A-Lab 02",
       subtitle = "COVID-19 Data: NY-Times",
       color = "") +
  facet_wrap(~state, scales = "free_y") +
  theme(plot.background = element_rect(fill = "white"),
        panel.background = element_rect(fill = "white"),
        plot.title = element_text(size = 15, face = 'bold')) +
  theme(aspect.ratio = .5)

## `summarise()` regrouping output by 'state' (override with `.groups` argument)

## Warning: Removed 4 rows containing missing values (position_stack).

## Warning: Removed 7 row(s) containing missing values (geom_path).

library(tidyverse)
dat2 = covid%>%
  filter(state %in% c('New York', 'California', 'Louisiana', 'Florida')) %>%
  group_by(state, date) %>%
  summarize(cases = sum(cases)) %>%
  ungroup(state, date) %>%
  group_by(state) %>%
  mutate(NewDailyCases = cases - lag(cases)) %>%
  mutate(SevenDayAvg = rollmean(NewDailyCases, 7, fill = NA, align = "right"))

## `summarise()` regrouping output by 'state' (override with `.groups` argument)

StatePopulationEstimates1 <- read_excel("~/github/geog-176A-labs/data/PopulationEstimates.xls", skip = 2) %>% 
  select(pop19 = POP_ESTIMATE_2019, state_abbr = State, state = Area_Name) %>% 
  group_by(state_abbr) %>% 
  slice_max(pop19, n = 1)
newjoineddata = inner_join(dat2, StatePopulationEstimates1, by = 'state')
percapdata <- newjoineddata %>% 
  group_by(state) %>% 
  mutate(NewCasesPerCap = NewDailyCases / pop19) %>% 
  mutate(NewSevenDayAvg = rollmean(NewCasesPerCap, 7, fill = NA, align = "right")) 
percapdata %>% 
  ggplot(aes(x = date)) +
  geom_col(aes(y = NewCasesPerCap), col = "#F5B8B5") +
  geom_line(aes(y = NewSevenDayAvg), col = "darkred", size = 1) +
  labs(title = "New Cases Per Capita: States",
       x = "Date",
       y = "Newcases",
       caption = "Geog 176A-Lab 02",
       subtitle = "COVID-19 Data: NY-Times",
       color = "") +
  facet_wrap(~state, scales = "free_y")

## Warning: Removed 4 rows containing missing values (position_stack).

## Warning: Removed 7 row(s) containing missing values (geom_path).

  theme(plot.background = element_rect(fill = "white"),
      panel.background = element_rect(fill = "white"),
      plot.title = element_text(size = 15, face = 'bold')) +
  theme(aspect.ratio = .5)

## List of 4
##  $ panel.background:List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ plot.background :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ linetype     : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ plot.title      :List of 11
##   ..$ family       : NULL
##   ..$ face         : chr "bold"
##   ..$ colour       : NULL
##   ..$ size         : num 15
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi FALSE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ aspect.ratio    : num 0.5
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi FALSE
##  - attr(*, "validate")= logi TRUE

Scaling by population, it data presents the directivity of the data. It becomes better for comparing the data because for some areas with absolute small population and small amount of confirmed cases (i.e. Louisiana), the ration of cases over population introduces a objective comparison between different places.It is now comparing the relative amount among different areas.

#Question3

library(readr)
county_centroids <- read_csv("../data/county-centroids.csv")

## Warning: Missing column names filled in: 'X1' [1]

## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   fips = col_character(),
##   name = col_character(),
##   state_name = col_character(),
##   LON = col_double(),
##   LAT = col_double()
## )

head(county_centroids)

## # A tibble: 6 x 6
##      X1 fips  name       state_name     LON   LAT
##   <dbl> <chr> <chr>      <chr>        <dbl> <dbl>
## 1     1 39131 Pike       Ohio         -83.1  39.1
## 2     2 46003 Aurora     South Dakota -98.6  43.7
## 3     3 55035 Eau Claire Wisconsin    -91.3  44.7
## 4     4 72145 Vega Baja  Puerto Rico  -66.4  18.4
## 5     5 48259 Kendall    Texas        -98.7  29.9
## 6     6 40015 Caddo      Oklahoma     -98.4  35.2

library(tidyverse)
library(ggplot2)
library(ggthemes)
county2 = county_centroids %>% 
  select(county = name, LON, LAT, fips) 
covid_xy = inner_join(covid, county2, by = c("county", "fips"))
xy1 <- covid_xy %>% 
  mutate(xcoord = cases * LON, ycoord = cases * LAT) %>% 
  group_by(date) %>% 
  summarize(cases = sum(cases), xcoord = sum(xcoord), ycoord = sum(ycoord)) %>% 
  mutate(longitude = xcoord / cases, latitude = ycoord / cases) %>% 
  mutate(month = format(date, "%m"))

## `summarise()` ungrouping output (override with `.groups` argument)

xy2 <- xy1 %>% 
  group_by(month) %>% 
  summarise(mocases = sum(cases))

## `summarise()` ungrouping output (override with `.groups` argument)

xy3 <- inner_join(xy1, xy2, by = "month") %>%
  select(date, longitude, latitude) 
knitr::kable(xy3, caption = "COVID-19 Weighted Mean", col.names = c("Date","Longitude","Latitude"))

COVID-19 Weighted Mean
Date	Longitude	Latitude
2020-01-21	-121.71707	48.04616
2020-01-22	-121.71707	48.04616
2020-01-23	-121.71707	48.04616
2020-01-24	-104.76683	44.94380
2020-01-25	-109.09942	41.19636
2020-01-26	-111.60366	38.24914
2020-01-27	-111.60366	38.24914
2020-01-28	-111.60366	38.24914
2020-01-29	-111.60366	38.24914
2020-01-30	-107.63915	38.84786
2020-01-31	-109.64744	38.61689
2020-02-01	-104.82632	39.08077
2020-02-02	-109.56226	38.67105
2020-02-03	-109.56226	38.67105
2020-02-04	-109.56226	38.67105
2020-02-05	-107.88345	39.03726
2020-02-06	-107.88345	39.03726
2020-02-07	-107.88345	39.03726
2020-02-08	-107.88345	39.03726
2020-02-09	-107.88345	39.03726
2020-02-10	-108.56428	38.57552
2020-02-11	-108.56428	38.57552
2020-02-12	-107.84690	37.92372
2020-02-13	-107.22516	37.35883
2020-02-14	-107.22516	37.35883
2020-02-15	-107.22516	37.35883
2020-02-16	-107.22516	37.35883
2020-02-17	-102.79544	38.93337
2020-02-18	-102.79544	38.93337
2020-02-19	-102.79544	38.93337
2020-02-20	-103.33011	39.08625
2020-02-21	-103.60991	38.42267
2020-02-22	-103.60991	38.42267
2020-02-23	-103.60991	38.42267
2020-02-24	-104.87364	38.07401
2020-02-25	-104.83642	38.20319
2020-02-26	-109.12713	38.23063
2020-02-27	-109.12713	38.23063
2020-02-28	-109.76143	38.48641
2020-02-29	-110.13587	38.90233
2020-03-01	-111.28243	39.34318
2020-03-02	-110.84800	39.73677
2020-03-03	-111.41056	39.99539
2020-03-04	-110.43084	40.45149
2020-03-05	-109.57127	40.88431
2020-03-06	-105.91102	40.51154
2020-03-07	-103.26843	40.63572
2020-03-08	-102.28225	40.83108
2020-03-09	-101.84456	40.73198
2020-03-10	-100.96236	41.21422
2020-03-11	-100.79788	41.16062
2020-03-12	-100.19289	41.04925
2020-03-13	-99.58952	40.67829
2020-03-14	-98.84527	40.52105
2020-03-15	-97.91477	40.13559
2020-03-16	-97.50178	40.02873
2020-03-17	-96.01656	39.82074
2020-03-18	-94.34913	39.50682
2020-03-19	-92.33915	39.46876
2020-03-20	-90.94669	39.29020
2020-03-21	-89.72008	39.24798
2020-03-22	-88.34892	39.32213
2020-03-23	-87.50645	39.33203
2020-03-24	-86.99549	39.31713
2020-03-25	-86.55382	39.20133
2020-03-26	-86.41556	39.17700
2020-03-27	-86.25009	39.14841
2020-03-28	-85.88313	39.11974
2020-03-29	-85.65266	39.12286
2020-03-30	-85.55488	39.09877
2020-03-31	-85.34786	38.97939
2020-04-01	-85.31978	38.94598
2020-04-02	-85.27695	38.83296
2020-04-03	-85.06093	38.82488
2020-04-04	-84.80636	38.80190
2020-04-05	-84.80197	38.83043
2020-04-06	-84.64756	38.78922
2020-04-07	-84.53244	38.76139
2020-04-08	-84.41680	38.77909
2020-04-09	-84.32947	38.77330
2020-04-10	-84.22378	38.78812
2020-04-11	-84.14674	38.79967
2020-04-12	-84.08134	38.81974
2020-04-13	-84.05539	38.81248
2020-04-14	-84.01732	38.82617
2020-04-15	-84.00795	38.83153
2020-04-16	-83.92644	38.84950
2020-04-17	-83.90980	38.84142
2020-04-18	-83.91162	38.85681
2020-04-19	-83.85708	38.87202
2020-04-20	-83.90651	38.87152
2020-04-21	-83.95576	38.86410
2020-04-22	-83.97437	38.87452
2020-04-23	-83.99720	38.87166
2020-04-24	-83.98080	38.89894
2020-04-25	-83.95909	38.93252
2020-04-26	-83.95273	38.94297
2020-04-27	-84.00340	38.94399
2020-04-28	-84.03362	38.94572
2020-04-29	-84.11681	38.95063
2020-04-30	-84.16396	38.95718
2020-05-01	-84.22121	38.95421
2020-05-02	-84.26843	38.95446
2020-05-03	-84.32438	38.96485
2020-05-04	-84.37582	38.95973
2020-05-05	-84.44419	38.92172
2020-05-06	-84.53328	38.92182
2020-05-07	-84.58601	38.92061
2020-05-08	-84.67140	38.92201
2020-05-09	-84.74042	38.91410
2020-05-10	-84.76529	38.90989
2020-05-11	-84.80349	38.90960
2020-05-12	-84.87709	38.90173
2020-05-13	-84.94616	38.88774
2020-05-14	-85.00682	38.88050
2020-05-15	-85.08625	38.86982
2020-05-16	-85.13437	38.86690
2020-05-17	-85.16707	38.86239
2020-05-18	-85.20307	38.85868
2020-05-19	-85.25569	38.84749
2020-05-20	-85.30450	38.84315
2020-05-21	-85.36197	38.82275
2020-05-22	-85.42312	38.81585
2020-05-23	-85.48128	38.80660
2020-05-24	-85.52456	38.80460
2020-05-25	-85.56852	38.79158
2020-05-26	-85.64199	38.77474
2020-05-27	-85.70095	38.76076
2020-05-28	-85.75618	38.74666
2020-05-29	-85.83234	38.72761
2020-05-30	-85.91430	38.70781
2020-05-31	-85.98881	38.69112
2020-06-01	-86.01690	38.68471
2020-06-02	-86.09843	38.66545
2020-06-03	-86.16604	38.64309
2020-06-04	-86.22957	38.61457
2020-06-05	-86.33733	38.59848
2020-06-06	-86.40692	38.57111
2020-06-07	-86.48501	38.55056
2020-06-08	-86.54775	38.53332
2020-06-09	-86.62061	38.50641
2020-06-10	-86.70563	38.47271
2020-06-11	-86.79881	38.44074
2020-06-12	-86.89573	38.40253
2020-06-13	-86.97926	38.36053
2020-06-14	-87.03784	38.33068
2020-06-15	-87.11287	38.30151
2020-06-16	-87.22370	38.25373
2020-06-17	-87.33673	38.21032
2020-06-18	-87.44462	38.16938
2020-06-19	-87.56227	38.11467
2020-06-20	-87.67831	38.06004
2020-06-21	-87.78122	38.01913
2020-06-22	-87.91551	37.97090
2020-06-23	-88.06802	37.91299
2020-06-24	-88.18025	37.84783
2020-06-25	-88.29790	37.79360
2020-06-26	-88.41906	37.71825
2020-06-27	-88.51414	37.64474
2020-06-28	-88.62290	37.58025
2020-06-29	-88.73743	37.53063
2020-06-30	-88.89787	37.46082
2020-07-01	-89.04987	37.39379
2020-07-02	-89.17898	37.31934
2020-07-03	-89.30126	37.24508
2020-07-04	-89.40924	37.17542
2020-07-05	-89.49926	37.11915
2020-07-06	-89.60566	37.06909
2020-07-07	-89.74902	37.00895
2020-07-08	-89.86178	36.94521
2020-07-09	-89.96823	36.88564
2020-07-10	-90.06408	36.81952
2020-07-11	-90.14747	36.76352
2020-07-12	-90.20769	36.70237
2020-07-13	-90.28424	36.64737
2020-07-14	-90.39484	36.59461
2020-07-15	-90.47869	36.54428
2020-07-16	-90.57764	36.47711
2020-07-17	-90.66820	36.43112
2020-07-18	-90.73216	36.39086
2020-07-19	-90.78723	36.34311
2020-07-20	-90.85234	36.30849
2020-07-21	-90.93081	36.27228
2020-07-22	-91.01596	36.23502
2020-07-23	-91.08417	36.19882
2020-07-24	-91.13900	36.16627
2020-07-25	-91.19623	36.13196
2020-07-26	-91.22094	36.11037
2020-07-27	-91.26255	36.08913
2020-07-28	-91.30714	36.06176
2020-07-29	-91.36696	36.03990
2020-07-30	-91.40591	36.01135
2020-07-31	-91.44895	35.98784
2020-08-01	-91.47764	35.96886
2020-08-02	-91.50515	35.95004
2020-08-03	-91.54262	35.93613
2020-08-04	-91.56652	35.92108
2020-08-05	-91.59286	35.90945
2020-08-06	-91.62896	35.89599
2020-08-07	-91.65413	35.88185
2020-08-08	-91.67574	35.86832
2020-08-09	-91.70192	35.85686
2020-08-10	-91.76003	35.85094
2020-08-11	-91.81139	35.84033
2020-08-12	-91.83849	35.82543
2020-08-13	-91.87426	35.82050
2020-08-14	-91.91626	35.81128
2020-08-15	-91.94898	35.80285
2020-08-16	-91.98262	35.79809
2020-08-17	-92.01049	35.79691
2020-08-18	-92.02914	35.78813
2020-08-19	-92.04135	35.78776
2020-08-20	-92.06251	35.78593
2020-08-21	-92.08065	35.78239
2020-08-22	-92.09270	35.78073
2020-08-23	-92.10525	35.77966
2020-08-24	-92.12248	35.77972
2020-08-25	-92.14593	35.77989
2020-08-26	-92.15682	35.77987
2020-08-27	-92.16885	35.78465
2020-08-28	-92.18042	35.78736
2020-08-29	-92.18264	35.78903
2020-08-30	-92.19093	35.79042
2020-08-31	-92.20632	35.79503
2020-09-01	-92.20042	35.78902
2020-09-02	-92.24419	35.78253
2020-09-03	-92.25249	35.78691
2020-09-04	-92.25787	35.80027
2020-09-05	-92.26169	35.80377
2020-09-06	-92.26490	35.80668
2020-09-07	-92.26130	35.81101
2020-09-08	-92.26779	35.81404
2020-09-09	-92.27203	35.81653
2020-09-10	-92.27199	35.82164
2020-09-11	-92.26961	35.82512
2020-09-12	-92.26811	35.83135

xy4 <- xy2 %>% 
  select(month, mocases)
knitr::kable(xy4, caption = "Monthly New Cases", col.names = c("Month","New Cases"))

Monthly New Cases
Month	New Cases
01	41
02	736
03	790711
04	15761769
05	38777213
06	57806205
07	102964255
08	156722282
09	71422826

ggplot(data = xy1, aes(x = longitude, y = latitude)) +
  borders("state", fill = "gray", colour = "white") +
  geom_point(aes(color = month, size = cases)) +
  labs(title = "COVID-19 Weighted Mean",
       x = "Longitude",
       y = "Latitude",
       caption = "Geog 176A-Lab 02",
       subtitle = "COVID-19 Data: NY-Times",
       color = "")

Weight is a relative concept. The weight of the weighted average reflects the relative importance in the overall evaluation. The weight indicates that in the evaluation process, it is the quantitative allocation of the importance degree of different aspects of the evaluated object, and the role of each evaluation factor in the overall evaluation is treated differently. In fact, an evaluation without a focus is not an objective evaluation. Weight indicates how important certain data is in a set of data, so the weighted average effect must be studied in combination with specific examples. The size of the weighted average is not only related to each data in a set but is also affected by the weight of each data. The greater the weight.The greater the effect on the average size. The reverse is smaller.

Geography 176A

Lab 02: COVID-19 Pandemic

Yan Wang