data("airquality")
str(airquality)
# 1. 데이터 구조 및 변수명 살펴보기
dim(airquality) # 153개의 행, 6개의 열
str(airquality)
airquality
head(airquality)
tail(airquality)
names(airquality) # 변수명 확인
#2.결측치 탐색
is.na(airquality) # 오존에 결측치 다량 확인
any(is.na(airquality))
sum(is.na(airquality)) # 총 44개
sum(is.na(airquality$Ozone)) # 오존에 결측값 37개
sum(is.na(airquality$Solar.R)) # 일조량에 결측값 7개
# 3. 연속형 변수에 대한 기술통계량 및 그래프
# Ozone
mean(airquality$Ozone, na.rm = TRUE)
max(airquality$Ozone, na.rm = T)
min(airquality$Ozone, na.rm = T)
median(airquality$Ozone, na.rm = T)
var(airquality$Ozone, na.rm = T)ㅁ
sd(airquality$Ozone, na.rm = T)
summary(airquality$Ozone)
# Solar.R
mean(airquality$Solar.R, na.rm = TRUE)
max(airquality$Solar.R, na.rm = T)
min(airquality$Solar.R, na.rm = T)
median(airquality$Solar.R, na.rm = T)
var(airquality$Solar.R, na.rm = T)
sd(airquality$Solar.R, na.rm = T)
summary(airquality$Solar.R)
# Wind
mean(airquality$Wind)
max(airquality$Wind)
min(airquality$Wind)
median(airquality$Wind)
var(airquality$Wind)
sd(airquality$Wind)
summary(airquality$Wind)
# Temp
mean(airquality$Temp)
max(airquality$Temp)
min(airquality$Temp)
median(airquality$Temp)
var(airquality$Temp)
sd(airquality$Temp)
summary(airquality$Temp)
summary(airquality)
# 그래프
hist(airquality$Ozone)
hist(airquality$Solar.R)
hist(airquality$Wind)
hist(airquality$Temp)
boxplot(airquality$Ozone)
boxplot(airquality$Solar.R)
boxplot(airquality$Wind)
boxplot(airquality$Temp)
# 4. Ozone과 Solar.R, Ozone과 Wind, Ozone과 Temp 간의 산점도 >> 상관계수
plot(airquality$Ozone, airquality$Solar.R)
scatterplot(Solar.R~Ozone, data=airquality, cex=2, lwd=2, boxplot=FALSE, smooth=FALSE)
cor(airquality$Ozone, airquality$Solar.R, use = "complete.obs") # tkdrhks
plot(airquality$Ozone, airquality$Wind)
scatterplot(Wind~Ozone, data=airquality, cex=2, lwd=2, boxplot=FALSE, smooth=FALSE)
cor(airquality$Ozone, airquality$Wind, use = "complete.obs") #
plot(airquality$Ozone, airquality$Temp)
scatterplot(Temp~Ozone, data=airquality, cex=2, lwd=2, boxplot=FALSE, smooth=FALSE)
cor(airquality$Ozone, airquality$Temp, use = "complete.obs")
# 5. 범주형 변수에 대한 요약테이블 및 그래프 (Month와 Day를 범주형으로 간주)
table(airquality$Month)
table(airquality$Day)
table(airquality$Month,airquality$Day)
table(airquality$Day,airquality$Month)
# 그래프
barplot(table(airquality$Month))
barplot(table(airquality$Day))
barplot(table(airquality$Day,airquality$Month))
# 6. 오존 농도와 가장 연관성이 많은 변수는 어떤 변수로 판단되는가?
airquality[1:2,]
pairs(airquality[,1:4], panel = panel.smooth)
scatterplot(Temp~Ozone, data=airquality, cex=2, lwd=2, boxplot=FALSE, smooth=FALSE)
cor(airquality$Ozone, airquality$Temp, use = "complete.obs")
# >상관계수가 0.69로 가장 큰 온도가 오존농도와 가장 연관성이 깊어보임.
# 7. 오존 농도는 월 또는 일별로 어떤 차이가 나타나는가?
barplot(table(airquality$Ozone,airquality$Month))
# 평소보다 6월에 오존 수치가 낮고, 9월에 조금 더 높게 나타남.
barplot(table(airquality$Ozone,airquality$Day))
# 한달 중 중순에 오존 수치가 높음.
'R' 카테고리의 다른 글
R에서 if 조건문 (0) | 2019.10.03 |
---|
댓글