# input file is all programmers/CS/EE people in California, PUMS 2000 5% # sample datapreppums <- function() { pums <<- read.table("PrgsEngs05.dat",header=T) # take only those who worked a full year, full time pums <<- pums[pums$WksWrk >= 48 & pums$WgInc > 30000,] # dummy for entered US after age 17 pums$Ent17 <<- (pums$Born != 6) & (pums$Age - (2000-pums$YrEntUS) >= 17) pums$China <<- pums$Born == 207 & pums$Ent17 pums$India <<- pums$Born == 210 & pums$Ent17 # dummies for MS, Phd pums$MS <<- pums$Educ == 14 pums$PhD <<- pums$Educ == 16 # dummies for salary about $150K etc. pums$k150 <<- pums$WgInc > 150000 pums$k200 <<- pums$WgInc > 200000 # give enough time to get green card, at least for EB-1, EB-2 pums32 <<- pums[pums$Age >= 32,] } # form confidence interval xbpm <- function(x) { return(c(mean(x),1.96*sd(x)/sqrt(length(x)))) } explorepums <- function() { # est. % of PhDs from China, India print(sum(pums$China)/sum(pums$Ent17)) print(sum(pums$India)/sum(pums$Ent17)) # regress wage against age, education, IC print(summary (lm(pums32$WgInc ~ pums32$Age + pums32$MS + pums32$PhD + pums32$Ent17 + pums32$China + pums32$India))) # logit regress wage > $152K against age, education, IC print(summary (glm(pums32$k150 ~ pums32$Age + pums32$MS + pums32$PhD + pums32$Ent17 + pums32$China + pums32$India, family=binomial))) }