set.seed(666)
x1 = rnorm(100) # some continuous variables
x2 = rnorm(100)
x3=sample(x=c(1, 2, 3), size=100, prob=rep(1/3, 3),replace = TRUE)
z = 0.01 + 0.5*x1+1.2*x2+0.75*x3 # linear combination with a bias
pr = 1/(1+exp(-z)) # pass through an inv-logit function
y = rbinom(100,1,pr) # bernoulli response variable
data.frame(pr,y)
df = data.frame(y=y,x1=x1,x2=x2,x3=x3)
glm( y~x1+x2+x3,data=df,family="binomial")
summary(glm( y~x1+x2+as.factor(x3),data=df,family="binomial") )
x1_a=rnorm(100000,mean=290,sd=15)
x1_b=rnorm(100000,mean=300,sd=15)
x1=c(x1_a,x1_b) ###numeric variable
x2_a=sample(1:4, size=100000, prob=c(.3,.5,.1,.1),replace = TRUE)
x2_b=sample(1:4, size=100000, prob=c(.1,.1,.3,.5),replace = TRUE)
x2=c(x2_a,x2_b)###categorical variable with 4 levels
y1=sample(0:1, size=100000, prob=c(.8,.2),replace = TRUE)
table(y2)
y2=sample(0:1, size=100000, prob=c(.6,.3),replace = TRUE)
y=c(y1,y2)
table(y)###create y variable
dat=data.frame(x1=x1,x2=x2,y=as.factor(y))
mylogit=glm(y~x1+as.factor(x2),data=dat,family=binomial())
summary(mylogit)
y1=sample(0:1, size=100000, prob=c(.8,.2),replace = TRUE)
table(y2)
y2=sample(0:1, size=100000, prob=c(.6,.3),replace = TRUE)
y=c(y1,y2)