########################################################################### ## Extract the New York Place Names and their Counties from the ## 2010 Decennial Census master geographic file, New York / PL 94-171 ########################################################################### # install.packages("tidyverse") # install.packages("dplyr") library(dplyr) # setwd("~/Desktop/tidycensus_work/output") # setwd("~/Desktop/Census/2010_Census/ca2010.sf1") # Note that the GEO file for both 2010 Decennial SF1 and PL 94-171 ARE THE SAME FORMAT! # Note that some places have a tilde-n in the place name. I don't know how to make R accept those values, so I went in the text file and change tilde-n to n. setwd("~/Desktop/ny2010.pl") x <- readLines(con="nygeo2010.pl") # Very large fixed format file, 635,130 observations for New York NYGeo <- data.frame(# fileid = substr(x, 1, 6), # stusab = substr(x, 7, 8), sumlev = substr(x, 9, 11), # geocomp= substr(x, 12, 13), state = substr(x, 28, 29), county = substr(x, 30, 32), place = substr(x, 46, 50), # tract = substr(x, 55, 60), # blkgrp = substr(x, 61, 61), # block = substr(x, 62, 65), arealand=substr(x, 199, 212), areawatr=substr(x, 213, 226), name = substr(x, 227, 316), pop100 = substr(x, 319, 327), hu100 = substr(x, 328, 336)) NYGeo$GEOID <- paste(NYGeo$state,NYGeo$place,sep="") sumlev155 <- subset(NYGeo, sumlev == 155) # state-place-county summary level sumlev160 <- subset(NYGeo, sumlev == 160) # state-place summary level coplace1 <- merge(sumlev155, sumlev160, by = c('state','place'), all=TRUE) coplace2 <- dplyr::rename(coplace1, county_name = name.x, # name is eg "Contra Costa County (part)" place_name = name.y, # name is eg "Acalanes Ridge CDP" county = county.x, GEOID = GEOID.x) # New York has 1,189 places (SUMLEV=160), and 1,206 place-county parts (SUMLEV=155) # Extra places that straddle two-or-more counties. # pop100.x = 2010 population count for, perhaps, part of the place (sumlev=155) # pop100.y = 2010 population count for the FULL place (sumlev=160) # Find the New York places straddling two-or-more counties splittown <- subset(coplace2, pop100.x < pop100.y) View(splittown) # This works better for New York, since there are a few place-county parts with zero population, # and 100 percent of population in the other place-county part. splittown2 <- subset(coplace2, arealand.x < arealand.y) View(splittown2) # Subset the Bay Area places from the SUMLEV=155/160 file # BayArea <- subset(coplace2, county== "001" | county=="013" | county=="041" | county=="055" | # county=="075" | county=="081" | county=="085" | county=="095" | # county=="097" ) # c(1,13,41,55,75,81,85,95,97) #################################################################### setwd("~/Desktop/tidycensus_work/output") #write.csv(BayArea,"Census2010_BayArea_Places.csv") #write.csv(coplace2,"Census2010_California_Places.csv") ####################################################################