구직자를 위한 기업 트렌드 시각화 경진대회

안녕하세요. 코드 공유드립니다.

2022.01.26 22:04 2,279 조회

코드가 너무 길어져서, 공유 페이지에서 보기가 힘든 관계로 해당 페이지에서 공유합니다.


---

title: "JobMarket"

author: "January"

date: "1/16/2022"

output: html_document

---


```{R message=FALSE, warning=FALSE, fig.showtext=TRUE}

print('hi')

```



```{r}


library(showtext)

font_add(family = "NanumGothic", regular = "/Library/Fonts/NanumSquareRoundR.ttf") 


library(data.table)

library(tidyverse)

library(ggridges)

library(ggrepel)

library(readxl)

library(scales)

library(patchwork)

library(stringi)

library(KoNLP)

library(corrplot)


showtext_auto() 



{

theme_myOwn = theme(panel.background = element_rect('#F5F5F5'),

           plot.background = element_rect('#F5F5F5'),

           panel.grid = element_blank(),

           axis.ticks.y = element_blank(),

           legend.position = 'none', 

           legend.background = element_blank(),

           plot.title.position = 'plot',

           plot.caption.position = 'plot',

           plot.title = element_text(family = 'NanumGothic',face = 'bold', size = 20),

           plot.subtitle = element_text(family = 'NanumGothic', color = "#525252"),

           plot.caption = ggtext::element_markdown(family = 'NanumGothic', size = 8, color = "#525252"),

           axis.title = element_text(family = 'NanumGothic',color = "#525252"),

           axis.text = element_text(family = 'NanumGothic',color = "#525252"))

}


library(viridis)

library(hrbrthemes)

library(ggmap)

mykey = "whatsup" # 본인의 API 키로 바꿔주세요.

register_google(key = mykey)

seoul = get_map("Seoul, South Korea", zoom = 12, maptype = "toner", source='stamen')



```



```{r}

# dataset = read.csv('/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/completeList2.csv') # BLIND.

# http://www.index.go.kr/potal/stts/idxMain/selectPoSttsIdxSearch.do?idx_cd=4219&stts_cd=421901


Sys.setlocale("LC_CTYPE", "ko_KR.UTF-8")


companies = read.csv('/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/companyInfo.csv')

sheet1 = read_excel("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/jobMarket.xlsx", sheet = "시트1")

sheet2 = read_excel("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/jobMarket.xlsx", sheet = "시트2")

sheet3 = read_excel("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/jobMarket.xlsx", sheet = "시트3")

employmentRate = read_excel("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/employmentRate.xlsx", sheet = "시트2")

averageWage = read_excel("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/AverageWage.xlsx", sheet = "시트1") # this too.

commute = read_excel("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/commute_time.xlsx", sheet = "시트1") # this too.


df = read.csv('/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/test5.csv', encoding = 'UTF-8')

latlng = read_excel("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/longlat.xlsx", sheet = "시트1")


latlng = latlng %>%

 select(시도, 시군구, 읍면동, 위도, 경도) %>%

 mutate(city = paste0(시도, ' ', 시군구, ' ', 읍면동))


```



```{r}


p1 = employmentRate %>%

 mutate(마감년월 = str_replace(월도, '월', '') %>% paste0(., '01') %>% lubridate::ymd(.)) %>%

 mutate(just_year = as.character(lubridate::year(마감년월)),

     plt_year = as.character(lubridate::year(마감년월)),

     plt_year = case_when(plt_year == "2021" ~ "2021",

               plt_year == "2020" ~ "2020",

               TRUE ~ "2002 ~ 2019"),

     month = as.numeric(lubridate::month(마감년월)),

     month = factor(month, levels = c('1','2','3','4','5','6','7','8','9','10','11','12'))) %>%

 ggplot(aes(x = month, y = `취업자수`, group = just_year, color = plt_year)) +

 geom_line() +

 geom_point(size = 2.0, shape = 22) +

 scale_y_continuous(labels = scales::comma, limits = c(0, 150000)) +

 scale_color_manual(values = c("grey95", "black","darkorange1"), name = "") +

 labs(x = NULL, y = NULL,

    title = '연/월간 취업자수 인원',

       subtitle = '(단위 : 명), \'02 ~ \'21년 ',

    caption = 'Source : e-나라지표') +

 theme_myOwn +

 theme(axis.text = element_text(family = 'NanumGothic', face = 'bold.italic', color = "black"),

    legend.position = 'bottom',

    legend.text = element_text(family = 'NanumGothic', color = "black"),

    legend.title = element_blank(),

    plot.background = element_rect('grey80'),

    panel.background = element_rect('grey80'),

    legend.background = element_rect('grey80'),

    legend.key = element_rect('grey80'))


p2 = employmentRate %>%

 mutate(마감년월 = str_replace(월도, '월', '') %>% paste0(., '01') %>% lubridate::ymd(.)) %>%

 mutate(just_year = as.character(lubridate::year(마감년월)),

     plt_year = as.character(lubridate::year(마감년월)),

     plt_year = case_when(plt_year == "2021" ~ "2021",

               plt_year == "2020" ~ "2020",

               TRUE ~ "2002 ~ 2019"),

     month = as.numeric(lubridate::month(마감년월)),

     month = factor(month, levels = c('1','2','3','4','5','6','7','8','9','10','11','12'))) %>%

 ggplot(aes(x = month, y = `구직`, group = just_year, color = plt_year)) +

 geom_line() +

 geom_point(size = 2.0, shape = 22) +

 scale_y_continuous(labels = scales::comma, limits = c(0, 550000)) +

 scale_color_manual(values = c("grey95","black","darkorange1"), name = "") +

 labs(x = NULL, y = NULL,

    title = '연/월간 구직자수 인원',

    subtitle = '(단위 : 명), \'02 ~ \'21년 ',

    caption = 'Source : e-나라지표') +

 theme_myOwn +

 theme(axis.text = element_text(family = 'NanumGothic', face = 'bold.italic', color = "black"),

    legend.position = 'bottom',

    legend.text = element_text(family = 'NanumGothic', color = "black"),

    legend.title = element_blank(),

    plot.background = element_rect('grey80'),

    panel.background = element_rect('grey80'),

    legend.background = element_rect('grey80'),

    legend.key = element_rect('grey80'))



p1 + p2


ggsave("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/visual2/1.employeed.png", width = 25, height = 12, units = "cm", dpi = 500)


```


그럼 회사들의 수요는 어느정도가 될까요?


```{r}


sheet1 %>%

 mutate(just_year = as.character(lubridate::year(마감년월)),

     plt_year = as.character(lubridate::year(마감년월)),

     plt_year = case_when(plt_year == "2021" ~ "2021",

               plt_year == "2020" ~ "2020",

               TRUE ~ "Previous years"),

     month = as.numeric(lubridate::month(마감년월)),

     month = factor(month, levels = c('1','2','3','4','5','6','7','8','9','10','11','12'))) %>%

 ggplot(aes(x = month, y = `구인인원(월)`, group = just_year, color = plt_year)) +

 geom_line() +

 geom_point(size = 2.0, shape = 22) +

 geom_segment(aes(x = 3, y = 100000, xend = 3, yend = 300000), linetype = 'dotted', color = 'black') +

 annotate("text",x = 4.5,y = 300000, size = 2.8, label = "\'20.01 코로나 유행", family = "NanumGothic", fontface = 2) +

 annotate("text",x = 7.0,y = 50000, size = 3.5, label = "* 코로나 이후 구인 인원이 크게 축소되었지만, \n 빠르게 반등세로 돌아왔음.\n 이후 계속적인 상승세를 보이고 있음.", family = "NanumGothic", color = 'grey50', fontface = 2) +

 scale_y_continuous(labels = scales::comma, limits = c(0, 400000)) +

 scale_color_manual(values = c("black","darkorange1","grey95"), name = "") +

 labs(x = NULL, y = NULL,

    title = '연/월간 구인 인원',

    subtitle = '(단위 : 명), \'18 ~ \'21년 ',

    caption = 'Source : e-나라지표') +

 theme_myOwn +

 theme(axis.text = element_text(family = 'NanumGothic', face = 'bold.italic', color = "black"),

    legend.position = 'right',

    legend.text = element_text(family = 'NanumGothic', color = "black"),

    legend.title = element_blank(),

    plot.background = element_rect('grey80'),

    panel.background = element_rect('grey80'),

    legend.background = element_rect('grey80'),

    legend.key = element_rect('grey80'))


ggsave("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/visual2/2.findingjobs.png", width = 20, height = 12, units = "cm", dpi = 500)


```



```{r}


df = df %>%

 select(-X) %>%

 filter(가입자수 >= 50) %>%

 transmute(`사업장명`,

     `사업장업종코드`,

     `가입자수`,

     `당월고지금액`,

     `신규취득자수`,

     `상실가입자수`,

     사업장지번상세주소,

     법정동주소광역시도코드,

      법정동주소광역시시군구코드,

     Payment_percap = `당월고지금액`/`가입자수`,

     Avg_monthly_pmt = (Payment_percap / 9) * 100,

     Avg_annual_pmt = (Avg_monthly_pmt) * 12)


```


```{r}


df2 = df %>%

 select(-c(사업장명, Payment_percap, Avg_annual_pmt)) %>%

 transmute(industry = as.numeric(substr(`사업장업종코드`,1,2)),

      numCount = 가입자수,

      Avg_monthly_pmt) %>%

 mutate(industryKor = case_when( between(industry,1,3) ~ '농업/임업/어업',

                 between(industry,5,8) ~ '광업',

                 between(industry,10,34) ~ '제조업',

                 between(industry,35,36) ~ '전기/가스/증기/수도',

                 between(industry,37,39) ~ '폐기물처리/환경복원업',

                 between(industry,41,42) ~ '건설업',

                 between(industry,45,47) ~ '도매/소매업',

                 between(industry,49,52) ~ '운수업',

                 between(industry,55,56) ~ '숙박및음식점업',

                 between(industry,58,63) ~ '출판/영상/방송/정보서비스업',

                  

                 between(industry,64,66) ~ '금융서비스업',

                 between(industry,68,69) ~ '부동산/임대업',

                 between(industry,70,73) ~ '전문/고학력/기술서비스업',

                 between(industry,74,75) ~ '사업시설관리/지원서비스업',

                  

                 between(industry,84,84) ~ '공공행정/국방/사회보장',

                 between(industry,85,85) ~ '교육서비스업',

                 between(industry,86,87) ~ '보건업/사회복지서비스업',

                 between(industry,90,91) ~ '예술/스포츠/여가관련',

                  

                 between(industry,94,96) ~ '협회/단체/기타개인서비스',

                 between(industry,97,98) ~ '가구내고용/자가생산',

                 between(industry,99,99) ~ '국제/외국기관',

                 TRUE ~ 'error'))

  

````


```{r}


targetIndustry = df2 %>%

 filter(industryKor != 'error') %>%

 select(industryKor, Avg_monthly_pmt) %>%

 group_by(industryKor) %>%

 summarise(meanPmt = mean(Avg_monthly_pmt, na.rm = TRUE)*12,

      n = n()) %>%

 filter(n >= 10) %>% pull(industryKor)


df2 %>%

 filter(industryKor != 'error') %>%

 transmute(industryKor, annual_income = Avg_monthly_pmt*12) %>%

 filter(industryKor %in% targetIndustry) %>%

 mutate(grp = as.factor(ifelse(industryKor == '금융서비스업', '금융업', '이외 산업'))) %>%

 ggplot(aes(x = reorder(industryKor, annual_income), y = annual_income, group = industryKor, fill = grp)) +

 geom_boxplot(outlier.color = 'grey30', outlier.alpha = 0.5, color = 'grey30') +

 scale_y_continuous(label = comma, limits = c(00000000, 70000000)) +

 scale_fill_manual(values=c("#E69F00", "#999999")) +

 coord_flip() + 

 labs(x = '', y = '', title = '업종별 추산연봉',

    subtitle = '국민연금 데이터 역산으로 산출 (단위 : 원)',

    caption = 'Source : data.go.kr') +

 theme(axis.title.x=element_text(family = 'NanumGothic',color = "#525252"),

    axis.text.x=element_text(family = 'NanumGothic',color = "#525252", size= 8),

    axis.ticks.x=element_blank(),

    axis.text.y =element_text(family = 'NanumGothic',color = "black", size = 9),

    axis.ticks.y=element_blank(),

     

    panel.background = element_rect(color = 'grey80', fill = 'grey80'),

    plot.background = element_rect(color = 'grey80', fill = 'grey80'),

    panel.grid = element_blank(),

    legend.position = 'none',

    plot.title = element_text(family = 'NanumGothic',face = 'bold', size = 20),

    plot.title.position = 'plot',

    plot.subtitle = element_text(family = 'NanumGothic', color = "#525252", size = 10),

    plot.caption = element_text(size = 8, color = "#525252", face = "bold.italic", family = 'NanumGothic'),

    legend.background = element_rect(fill = 'grey80', color = 'grey80'),

    plot.margin = margin(t=1,r=2,b=0,l=2, unit = 'cm'))


ggsave("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/visual2/3.WagebyIndustry.png", width = 20, height = 12, units = "cm", dpi = 500)


```


```{r}


df2 %>%

 filter(!(industryKor %in% c('error', '폐기물처리/환경복원업'))) %>%

 group_by(industryKor) %>%

 summarise(meanCount = mean(numCount, na.rm = TRUE),

      n = n()) %>% filter(n >= 20) %>%

 ggplot(aes(x = reorder(industryKor, meanCount), y = meanCount, fill = meanCount, group = 1)) +

 geom_segment(aes(xend = reorder(industryKor, meanCount), yend = 0), linetype = 'dotted', color = 'grey60') + 

 geom_hline(yintercept = mean(df2$numCount), linetype = 'dashed', color = 'grey70', alpha = 0.9) +

 geom_line(stat = 'identity', alpha = 0.5, linetype = 'longdash') +

 geom_point(stat = 'identity', size = 4, shape = 21) +

# annotate("text",x = 1.5, y = 300, size = 2.8, label = "평균 근무인원 228명", family = "NanumGothic", fontface = 2) +

 scale_fill_continuous(type = "viridis") +

 coord_polar(clip = 'off') +

 theme_minimal() +

 labs(x = '', y = '', title = '업종별 평균 근무인원 추산',

    subtitle = '국민연금 가입 사업장 중 50명 이상 기업 (단위 : 명)',

    caption = '평균 근무 인원은 228명. Source : data.go.kr') +

 theme(axis.title.x=element_text(family = 'NanumGothic',color = "#525252"),

    axis.text.x=element_text(family = 'NanumGothic',color = "#525252"),

    axis.ticks.x=element_blank(),

    axis.text.y =element_blank(),

    axis.ticks.y=element_blank(),

     

    panel.background = element_rect(color = 'grey80', fill = 'grey80'),

    plot.background = element_rect(color = 'grey80', fill = 'grey80'),

    panel.grid = element_blank(),

    legend.position = 'bottom', 

    legend.key.size = unit(0.5, 'cm'),

    legend.key.height = unit(0.2, 'cm'),

    legend.key.width = unit(1.0, 'cm'),

    legend.title = element_blank(),


    plot.title = element_text(family = 'NanumGothic',face = 'bold', size = 20),

    plot.title.position = 'panel',

    plot.subtitle = element_text(family = 'NanumGothic', color = "#525252"),

    plot.caption = ggtext::element_markdown(family = 'NanumGothic', size = 8, color = "#525252"),

    plot.caption.position = 'plot',

    legend.background = element_rect(fill = 'grey80', color = 'grey80'),

    plot.margin = margin(t=1,r=2,b=0,l=2, unit = 'cm'))


ggsave("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/visual2/4.companySize.png", width = 18, height = 18, units = "cm", dpi = 500)


```



```{r}


plt = df %>%

 filter(법정동주소광역시도코드 %in% c(11)) %>%

 separate(사업장지번상세주소, c('city','town','dong'), ' ') %>%

 transmute(geocode = as.factor(paste0(city,' ',town, ' ', dong)),

      사업장업종코드,

      가입자수,

      당월고지금액,

      Payment_percap = `당월고지금액`/`가입자수`,

      Avg_monthly_pmt = (Payment_percap / 9) * 100,

      Avg_annual_pmt = (Avg_monthly_pmt) * 12) %>%

 group_by(geocode) %>%

 summarise(sumCnt = sum(가입자수),

      meanPmt = mean(Avg_monthly_pmt, na.rm = TRUE)) %>%

 left_join(latlng %>%

       rename(geocode = city,

           longitude = 위도,

           latitude = 경도) %>%

       select(geocode, longitude, latitude), by = 'geocode')


```



```{r}


ggmap(seoul, darken = 0.5, extent = "device") +

 geom_point(plt, mapping = aes(x = latitude, y = longitude, color = sumCnt, alpha = 0.1, size = sumCnt)) +

 scale_size(range = c(.1, 20), name="unstated", guide = 'none') +

 scale_alpha(guide = 'none') +

 scale_colour_continuous(labels=comma, type = 'viridis') +

 labs(x = '', y = '', title = '어느 지역에 내 자리가 가장 많을까?',

   subtitle = '국민연금 데이터를 통해 추산한 서울권 평균 근무인원 (단위 : 명)',

   caption = 'Source : data.go.kr') +

 theme(panel.grid = element_blank(),

    axis.ticks = element_blank(),

    legend.position = 'bottom',

    plot.title = element_text(family = 'NanumGothic',face = 'bold', size = 14),

    plot.subtitle = element_text(family = 'NanumGothic', color = "#525252"),

    plot.caption = ggtext::element_markdown(family = 'NanumGothic', size = 8, color = "#525252"),

    panel.background = element_rect(color = 'grey80', fill = 'grey80'),

    legend.background = element_rect(color = 'grey80', fill = 'grey80'),

    plot.background = element_rect(color = 'grey80', fill = 'grey80'),

    legend.title = element_blank(),

    legend.key.size = unit(0.5, 'cm'),

    legend.key.height = unit(0.2, 'cm'),

    legend.key.width = unit(1.2, 'cm'),

    plot.margin = margin(t=1,r=2,b=0,l=2, unit = 'cm'))



ggsave("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/visual2/5.Seoulmap.png", width = 18, height = 18, units = "cm", dpi = 500)


# https://blog.naver.com/PostView.naver?blogId=kiakass&logNo=222449339999&from=search&redirect=Log&widgetTypeCall=true&directAccess=false


```


```{r}


ggmap(seoul, darken = 0.5, extent = "device") +

 geom_point(plt, mapping = aes(x = latitude, y = longitude, color = meanPmt/10000, alpha = 0.5, size = meanPmt/10000)) +

 scale_size(range = c(.1, 6), name="unstated", guide = 'none') +

 scale_alpha(guide = 'none') +

 scale_colour_continuous(labels=comma, type = 'viridis') +

 labs(x = '', y = '', title = '어느 지역이 월급이 높을까?',

   subtitle = '국민연금 데이터를 통해 추산한 서울권 평균 월급',

   caption = 'Source : data.go.kr',

   color='(단위 : 만원)') +

 theme(panel.grid = element_blank(),

    axis.ticks = element_blank(),

    legend.position = 'bottom',

    plot.title = element_text(family = 'NanumGothic',face = 'bold', size = 14),

    plot.subtitle = element_text(family = 'NanumGothic', color = "#525252"),

    plot.caption = ggtext::element_markdown(family = 'NanumGothic', size = 8, color = "#525252"),

    panel.background = element_rect(color = 'grey80', fill = 'grey80'),

    legend.background = element_rect(color = 'grey80', fill = 'grey80'),

    plot.background = element_rect(color = 'grey80', fill = 'grey80'),

    legend.title = element_text(size = 7 ,vjust = 0.9),

    legend.key.size = unit(0.5, 'cm'),

    legend.key.height = unit(0.2, 'cm'),

    legend.key.width = unit(1.2, 'cm'),

    plot.margin = margin(t=1,r=2,b=0,l=2, unit = 'cm'))


ggsave("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/visual2/6.Seoulmap_money.png", width = 18, height = 18, units = "cm", dpi = 500)


# https://blog.naver.com/PostView.naver?blogId=kiakass&logNo=222449339999&from=search&redirect=Log&widgetTypeCall=true&directAccess=false


```



```{r}


plt %>%

 separate(geocode, c('city','town','dong'), ' ') %>%

 transmute(geocode = as.factor(town),

      latitude,

      longitude,

      meanPmt) %>%

 left_join(commute %>% rename(geocode = 위치), by = 'geocode') %>%

 select(geocode, 평균소요시간) %>%

 distinct() %>%

 mutate(geocode = as.factor(geocode)) %>%

 arrange(desc(평균소요시간)) %>%

 top_n(9) %>%

 mutate(grp = as.factor(case_when((geocode == '영등포구') ~ '영등포구',

                  (geocode == '양천구') ~ '양천구',

                  TRUE ~ '이외 지역'))) %>%

 ggplot(aes(x = reorder(geocode,`평균소요시간`), y = `평균소요시간`, fill = grp, group = grp)) +

# geom_segment(aes(xend = reorder(geocode,`평균소요시간`), yend = 00), linetype = 'solid', color = 'viridis') +

 geom_bar(aes(x = reorder(geocode,`평균소요시간`), y = 평균소요시간), width = 0.5, stat = 'identity') +

 geom_segment(aes(xend = reorder(geocode,`평균소요시간`), yend = 60), linetype = 'dotted', color = 'grey65') +

 geom_point(stat = 'identity', size = 3, shape = 21, fill = 'grey') +

 geom_text(aes(label = `평균소요시간`), stat = "identity", hjust = 1.7, size = 2.2) +

 scale_y_continuous(limits = c(00, 60)) +

 scale_fill_manual(values=c("#E69F00","#E69F00", "#999999")) +

 geom_vline(xintercept = 9, color = 'grey50', linetype = 'longdash') +

 coord_polar(clip = 'off', theta = 'y') +

 # annotate("text", x = 12.9, y = 55.5, size = 3.0, label = "영등포 기준 평균 38.2 분 \n 가장 긴 양천구는 41분 소요됩니다.", family = 'NanumGothic', fontface = 2, color = 'black') +

 theme_minimal() +

 labs(x = '', y = '', title = '평균 출퇴근 시간',

    subtitle = '가장 오래 걸리는 지역 (단위 : 분) \n\n

    "가장 중요하면서도 고려를 잘 안하는 출퇴근시간.. \n지하철에서 버려지는 시간은 무려 하루 평균 1시간 반!"',

    caption = 'Source : data.go.kr') +

 theme(axis.title.x=element_text(family = 'NanumGothic',color = "#525252"),

    axis.text.x=element_text(family = 'NanumGothic',color = "#525252", size = 9, face = 'bold'),

    axis.ticks.x=element_blank(),

    axis.text.y =element_blank(),

    axis.ticks.y=element_blank(),

    panel.background = element_rect(color = 'grey80', fill = 'grey80'),

    plot.background = element_rect(color = 'grey80', fill = 'grey80'),

    panel.grid = element_blank(),

    legend.position = 'bottom', 

    legend.key.size = unit(0.5, 'cm'),

    legend.key.height = unit(0.2, 'cm'),

    legend.key.width = unit(0.7, 'cm'),

    legend.title = element_blank(),

    legend.text = element_text(family = 'NanumGothic',color = "#525252", face = 'bold'),

    plot.title = element_text(family = 'NanumGothic',face = 'bold', size = 20),

    plot.subtitle = element_text(family = 'NanumGothic', color = "#525252",margin=margin(0,0,20,0)),

    plot.caption = ggtext::element_markdown(family = 'NanumGothic', size = 8, color = "#525252"),

    legend.background = element_rect(fill = 'grey80', color = 'grey80'),

    plot.margin = margin(t=0.5,r=2,b=0,l=2, unit = 'cm'))


ggsave("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/visual2/7.commute.png", width = 18, height = 18, units = "cm", dpi = 500)


```



```{r}


plt %>%

 separate(geocode, c('city','town','dong'), ' ') %>%

 transmute(geocode = as.factor(town),

      latitude,

      longitude,

      meanPmt) %>%

 left_join(commute %>% rename(geocode = 위치), by = 'geocode') %>%

 select(geocode, 평균소요시간) %>%

 distinct() %>%

 mutate(geocode = as.factor(geocode)) %>%

 arrange(desc(평균소요시간)) %>%

 top_n(-9) %>%

 mutate(grp = as.factor(case_when((geocode == '중구') ~ '중구',

                  TRUE ~ '이외 지역'))) %>%

 ggplot(aes(x = reorder(geocode,-`평균소요시간`), y = `평균소요시간`, fill = grp, group = grp)) +

# geom_segment(aes(xend = reorder(geocode,`평균소요시간`), yend = 00), linetype = 'solid', color = 'viridis') +

 geom_bar(aes(x = reorder(geocode,`평균소요시간`), y = 평균소요시간), width = 0.5, stat = 'identity') +

 geom_segment(aes(xend = reorder(geocode,`평균소요시간`), yend = 60), linetype = 'dotted', color = 'grey65') +

 geom_point(stat = 'identity', size = 3, shape = 21, fill = 'grey') +

 geom_text(aes(label = `평균소요시간`), stat = "identity", hjust = 1.7, size = 2.2) +

 scale_y_continuous(limits = c(00, 60)) +

 scale_fill_manual(values=c("#999999","#E69F00")) +

 geom_vline(xintercept = 9, color = 'grey50', linetype = 'longdash') +

 coord_polar(clip = 'off', theta = 'y') +

 # annotate("text", x = 8.9, y = 48.5, size = 3.0, label = "영등포 기준 평균 38.2 분 \n 가장 긴 양천구는 41분 소요됩니다.", family = 'NanumGothic', fontface = 2, color = 'black') +

 theme_minimal() +

 labs(x = '', y = '', title = '평균 출퇴근 시간',

    subtitle = '가장 덜 걸리는 지역 (단위 : 분)',

    caption = 'Source : data.go.kr') +

 theme(axis.title.x=element_text(family = 'NanumGothic',color = "#525252"),

    axis.text.x=element_text(family = 'NanumGothic',color = "#525252", size = 9, face = 'bold'),

    axis.ticks.x=element_blank(),

    axis.text.y =element_blank(),

    axis.ticks.y=element_blank(),

    panel.background = element_rect(color = 'grey80', fill = 'grey80'),

    plot.background = element_rect(color = 'grey80', fill = 'grey80'),

    panel.grid = element_blank(),

    legend.position = 'bottom', 

    legend.key.size = unit(0.5, 'cm'),

    legend.key.height = unit(0.2, 'cm'),

    legend.key.width = unit(0.7, 'cm'),

    legend.title = element_blank(),

    legend.text = element_text(family = 'NanumGothic',color = "#525252", face = 'bold'),

    plot.title = element_text(family = 'NanumGothic',face = 'bold', size = 20),

    plot.subtitle = element_text(family = 'NanumGothic', color = "#525252",margin=margin(0,0,0,0)),

    plot.caption = ggtext::element_markdown(family = 'NanumGothic', size = 8, color = "#525252"),

    legend.background = element_rect(fill = 'grey80', color = 'grey80'),

    plot.margin = margin(t=0,r=2,b=0,l=2, unit = 'cm'))


ggsave("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/visual2/8.commute_less.png", width = 18, height = 18, units = "cm", dpi = 500)


```


```{r}


df3 = df %>%

 filter(!is.na(사업장업종코드)) %>%

 filter(nchar(사업장업종코드) == 6) %>%

 transmute(industry = substr(사업장업종코드,1,2),

      가입자수,

      신규취득자수,

      상실가입자수,

      당월고지금액,

      Payment_percap = `당월고지금액`/`가입자수`,

      Avg_monthly_pmt = (Payment_percap / 9) * 100,

      Avg_annual_pmt = (Avg_monthly_pmt) * 12) %>%

 mutate(person_in = 신규취득자수/가입자수,

     person_out = 상실가입자수/가입자수) %>% # 신규취득자수/가입자수 + 상실가입자수/신규취득자수

 mutate(industryKor = case_when( between(industry,1,3) ~ '농업/임업/어업',

                 between(industry,5,8) ~ '광업',

                 between(industry,10,34) ~ '제조업',

                 between(industry,35,36) ~ '전기/가스/증기/수도',

                 between(industry,37,39) ~ '폐기물처리/환경복원업',

                 between(industry,41,42) ~ '건설업',

                 between(industry,45,47) ~ '도매/소매업',

                 between(industry,49,52) ~ '운수업',

                 between(industry,55,56) ~ '숙박및음식점업',

                 between(industry,58,63) ~ '출판/영상/방송/정보서비스업',

                  

                 between(industry,64,66) ~ '금융서비스업',

                 between(industry,68,69) ~ '부동산/임대업',

                 between(industry,70,73) ~ '전문/고학력/기술서비스업',

                 between(industry,74,75) ~ '사업시설관리/지원서비스업',

                  

                 between(industry,84,84) ~ '공공행정/국방/사회보장',

                 between(industry,85,85) ~ '교육서비스업',

                 between(industry,86,87) ~ '보건업/사회복지서비스업',

                 between(industry,90,91) ~ '예술/스포츠/여가관련',

                  

                 between(industry,94,96) ~ '협회/단체/기타개인서비스',

                 between(industry,97,98) ~ '가구내고용/자가생산',

                 between(industry,99,99) ~ '국제/외국기관',

                 TRUE ~ 'error'))


df3 %>%

 select(industryKor, person_in, person_out) %>%

 filter(industryKor != 'error') %>%

 mutate(person_out = person_out * -1) %>%

 group_by(industryKor) %>%

 summarise(mean_in = mean(person_in)*100,

      mean_out = mean(person_out)*100) %>%

 ggplot(aes(x = industryKor)) +

 geom_hline(yintercept = 0, linetype = 'dotted') +

 geom_segment(aes(x = industryKor, xend = industryKor, y = -10, yend = 15), linetype = 'dotted', size = 0.5, alpha = 0.5, color = 'grey40') +

 geom_segment(aes(x = industryKor, xend = industryKor, y = 0, yend = mean_in), linetype = 'solid', size = 1.25, color = 'grey40') +

 geom_segment(aes(x = industryKor, xend = industryKor, y = 0, yend = mean_out), linetype = 'solid',size = 1.25, color = 'grey40') +

 geom_point(aes(y = mean_in),size = 3, shape = 21, fill = '#73D055FF') +

 geom_point(aes(y = mean_out), size = 3, shape = 21, fill = '#404788FF') +

 annotate("text", x = 3, y = 10, size = 2.8, label = "도소매업에서의 입사율이 높은 것은,\n아르바이트의 존재 때문일까요?", family = "NanumGothic") +

 scale_y_continuous(breaks = pretty_breaks()) +

 coord_flip() +

 theme_minimal() +

 labs(x = '', y = '', title = '산업군별 입사율 및 퇴사율',

    subtitle = '(단위 : %)',

    caption = 'Source : data.go.kr',

    color = '') +

 theme(axis.title.x=element_text(family = 'NanumGothic',color = "#525252"),

    axis.text.x=element_text(family = 'NanumGothic',color = "#525252", size = 9),

    axis.ticks.x=element_blank(),

    axis.text.y =element_text(family = 'NanumGothic',color = "#525252", face = 'bold'),

    axis.ticks.y=element_blank(),

    panel.background = element_rect(color = 'grey80', fill = 'grey80'),

    plot.background = element_rect(color = 'grey80', fill = 'grey80'),

    panel.grid = element_blank(),

    legend.position = 'right', 

    plot.title = element_text(family = 'NanumGothic',face = 'bold', size = 20),

    plot.subtitle = element_text(family = 'NanumGothic', color = "#525252"),

    plot.caption = ggtext::element_markdown(family = 'NanumGothic', size = 8, color = "#525252"))



ggsave("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/visual2/9.inandout.png", width = 25, height = 15, units = "cm", dpi = 500)


```


```{r}


# 좋은 기업의 기준?

# 1. 매출액 10억이상

# 2. 인원증가율 10% 이상.


df %>%

 filter(가입자수 >= 300) %>%

 filter(!is.na(사업장업종코드)) %>%

 filter(nchar(사업장업종코드) == 6) %>%

 transmute(industry = substr(사업장업종코드,1,2),

      사업장명,

      가입자수,

      신규취득자수,

      상실가입자수,

      당월고지금액,

      Payment_percap = `당월고지금액`/`가입자수`,

      Avg_monthly_pmt = (Payment_percap / 9) * 100,

      Avg_annual_pmt = (Avg_monthly_pmt) * 12) %>%

 mutate(inOutRate = (신규취득자수-상실가입자수)/(가입자수)) %>%

 mutate(사업장명 = str_replace_all(사업장명,'주식회사','')) %>%

 mutate(사업장명 = str_replace_all(사업장명,'[(주)]','')) %>%

 filter(between(inOutRate, 0.1, 0.2))



```


```{r}


df = read.csv('/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/test5.csv', encoding = 'UTF-8')


t0bins <- seq(0, 62880000, by = 2500000)


df %>%

 mutate( Payment_percap = `당월고지금액`/`가입자수`,

     Avg_monthly_pmt = (Payment_percap / 9) * 100,

     Avg_annual_pmt = (Avg_monthly_pmt) * 12) %>%

 filter(!is.na(Avg_annual_pmt)) %>% 

 mutate(bins=cut(Avg_annual_pmt, breaks=t0bins)) %>%

 ggplot(aes(x = Avg_annual_pmt, group = bins, fill = bins)) +

 geom_histogram(color = 'grey', bins = 200) +

 scale_fill_viridis(discrete = T) +

 geom_curve(aes(x = 50000000, y = 4000, xend = 59000000, yend = 1000), size = 0.25,

      arrow = arrow(length = unit(0.03, "npc")), curvature = 0.2, linetype = 'longdash') +

 annotate("text", x = 50000000, y = 4500, size = 2.8, label = "납부액상한이 있다보니, \n 6천만원이 넘어가는 부분에서의 정확한 \n 연봉계산은 어렵습니다.", family = "NanumGothic") +

 scale_x_continuous(labels = scales::comma) +

 scale_y_continuous(labels = scales::comma) +

 theme_minimal() +

 labs(x = '', y = '', title = '전체 사업체 평균 연봉',

    subtitle = '(단위 : 원)',

    caption = 'Source : data.go.kr',

    color = '') +

 theme(axis.title.x=element_text(family = 'NanumGothic',color = "#525252"),

    axis.text.x=element_text(family = 'NanumGothic',color = "#525252", size = 9),

    axis.ticks.x=element_blank(),

    axis.text.y =element_text(family = 'NanumGothic',color = "#525252", face = 'bold'),

    axis.ticks.y=element_blank(),

    panel.background = element_rect(color = 'grey80', fill = 'grey80'),

    plot.background = element_rect(color = 'grey80', fill = 'grey80'),

    panel.grid = element_blank(),

    legend.position = 'none', 

    plot.title = element_text(family = 'NanumGothic',face = 'bold', size = 20),

    plot.subtitle = element_text(family = 'NanumGothic', color = "#525252", margin=margin(0,0,20,0)),

    plot.caption = ggtext::element_markdown(family = 'NanumGothic', size = 8, color = "#525252"))



  

ggsave("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/visual2/10.wholeWage.png", width = 25, height = 15, units = "cm", dpi = 500)


```


```{r}




```


```{r}


clf = c('199만원 이하','200~299만원', '300~399만원', '400~599만원', '600만원 이상')

scr = c(4.1, 4.09, 4.33, 4.62, 4.6)


df_temp = cbind(clf, scr) %>% as.data.frame()  



int_breaks <- function(x, n = 5) {

 l <- pretty(x, n)

 l[abs(l %% 1) < .Machine$double.eps ^ 0.5]

}


df_temp %>%

 mutate(scr = as.numeric(scr)) %>%

 ggplot(aes(x = clf, y = scr)) +

 geom_segment(aes(x = clf, xend = clf, y = 3, yend = scr),linetype = 'dotted', color = 'brown') +

 geom_segment(aes(x = clf, xend = clf, y = 5, yend = scr),linetype = 'dashed', color = 'brown', alpha = 0.6) +

 geom_point(size = 15, shape = 21, fill = 'cornsilk2') +

 geom_text(aes(label = as.character(scr)), vjust = 0.5, hjust = 0.5, size = 3, color = 'black') +

 geom_curve(aes(x = 3, y = 3.7, xend = 3.9, yend = 4.5), size = 0.25,

     arrow = arrow(length = unit(0.03, "npc")), curvature = -0.2, linetype = 'solid') +

 annotate("text", x = 2.7, y = 3.5, size = 3.0, label = "급여가 높을 수록 직무 만족도는 소폭 상승해요.", family = 'NanumGothic', fontface = 2, color = 'black') +

 scale_y_continuous(limits = c(3,5), breaks = int_breaks) +

 coord_flip() +

 theme_minimal() +

 labs(x = '', y = '', title = '급여별 근무 만족도 결과',

    subtitle = '(단위 : 점)',

    caption = 'Source : Gallup Korea',

    color = '') +

 theme(axis.title.x=element_text(family = 'NanumGothic',color = "#525252"),

    axis.text.x=element_text(family = 'NanumGothic',color = "#525252", size = 9),

    axis.ticks.x=element_blank(),

    axis.text.y =element_text(family = 'NanumGothic',color = "#525252", face = 'bold'),

    axis.ticks.y=element_blank(),

    panel.background = element_rect(color = 'grey80', fill = 'grey80'),

    plot.background = element_rect(color = 'grey80', fill = 'grey80'),

    panel.grid = element_blank(),

    legend.position = 'none', 

    plot.title = element_text(family = 'NanumGothic',face = 'bold', size = 20),

    plot.subtitle = element_text(family = 'NanumGothic', color = "#525252", margin=margin(0,0,20,0)),

    plot.caption = ggtext::element_markdown(family = 'NanumGothic', size = 8, color = "#525252"))


   


ggsave("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/visual2/12.Lasttwo.png", width = 25, height = 15, units = "cm", dpi = 500)



```


```{r}





df %>%

 mutate(사업장명 = str_replace_all(사업장명,'주식회사','')) %>%

 mutate(사업장명 = str_replace_all(사업장명,'[(주)]','')) %>%

 left_join(companies %>%

       filter(매출액 >= 100000) %>%

       rename(사업장명 = 종목명), by = '사업장명') %>% filter(!is.na(기준)) %>%

 filter(매출액 >= 100000) %>%

 mutate(Payment_percap = `당월고지금액`/`가입자수`,

     Avg_monthly_pmt = (Payment_percap / 9) * 100,

     Avg_annual_pmt = (Avg_monthly_pmt) * 12, 

     inOutRate = (신규취득자수-상실가입자수)/(가입자수)*100) %>%

 filter(between(inOutRate,0,100)) %>%

 transmute(사업장명, Avg_annual_pmt = log1p(Avg_annual_pmt), inOutRate, 매출액 = log1p(매출액)) %>%

 arrange(desc(Avg_annual_pmt)) %>%

 filter(inOutRate != 0) %>%

 ggplot(aes(x = 매출액, y = Avg_annual_pmt, color = inOutRate, size = inOutRate)) +

 geom_smooth(color = 'black', alpha = 0.5, method = 'lm') +

 geom_point(alpha = 0.7) +

 geom_text(aes(label = 사업장명), vjust = 3.0, size = 2) +

 scale_size(range = c(0,10), guide = 'none') +

 scale_x_continuous(labels = scales::comma) +

 scale_y_continuous(labels = scales::comma, limits = c(16.75, 18)) +

 annotate("text",x = 16.0, y = 17.3, size = 2.8, color = 'grey20', label = "인원 증가율이 클 수록 원이 커집니다.", family = "NanumGothic", fontface = 2) +

 theme_minimal() +

 labs(x = '매출액', y = '연봉', title = '매출액과 연봉, 인원증가율의 상관관계',

    subtitle = '(단위 : Log/원)',

    caption = 'Source : data.go.kr',

    color = '인원 증가율 (%)') +

 theme(axis.title.x=element_text(family = 'NanumGothic',color = "#525252"),

    axis.text.x=element_text(family = 'NanumGothic',color = "#525252", size = 9),

    axis.ticks.x=element_blank(),

    axis.text.y =element_text(family = 'NanumGothic',color = "#525252", face = 'bold'),

    axis.ticks.y=element_blank(),

    panel.background = element_rect(color = 'grey80', fill = 'grey80'),

    plot.background = element_rect(color = 'grey80', fill = 'grey80'),

    panel.grid = element_blank(),

    legend.position = 'bottom',

    legend.title = element_text(family = 'NanumGothic',color = "#525252", face = 'bold', vjust = 0.75),

    legend.box.margin = margin(0,1,0,1),

    plot.title = element_text(family = 'NanumGothic',face = 'bold', size = 20),

    plot.subtitle = element_text(family = 'NanumGothic', color = "#525252", margin=margin(0,0,20,0)),

    plot.caption = ggtext::element_markdown(family = 'NanumGothic', size = 8, color = "#525252"))


ggsave("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/visual2/11.Lastone.png", width = 25, height = 17, units = "cm", dpi = 500)




```


```{r}



# https://kosis.kr/statisticsList/statisticsListIndex.do?parentId=P1.1&vwcd=MT_ZTITLE&menuId=M_01_01#content-group # sw 기술자 직종별임금

# 소프트웨어사업을 영위하는 기업체에서 근무하는 소프트웨어기술자의 실지급임금을 조사하여, 소프트웨어사업 수행 시 투입기술자의 평균임금으로 적용할 수 있도록 제공하며, 소프트웨어사업에 종사하는 소프트웨어기술인력의 임금동향 파악


sw_wage = read.csv('/Users/jungwonwoo/Desktop/addition/SW_AverageWage_20220126004628.csv', fileEncoding = "euc-kr")


meanWage = sw_wage %>%

 rename(Wage = `X2019`) %>%

 mutate(grp = as.factor(ifelse(직무별 == '데이터분석가', 1, 0)),

     Wage = round((Wage * 20.9)*1.05/10000),0) %>% pull(Wage) %>% mean()


sw_wage %>%

 rename(Wage = `X2019`) %>%

 mutate(grp = as.factor(ifelse(직무별 == '데이터분석가', 1, 0)),

     Wage = round((Wage * 20.9)*1.05/10000),0) %>%

 ggplot(aes(x = reorder(직무별,Wage), y = Wage, group = grp, fill = grp)) +

 geom_segment(aes(xend = reorder(직무별,Wage), yend = 350),linetype = 'dashed', color = 'grey75', alpha = 0.9) +

 geom_segment(aes(xend = reorder(직무별,Wage), yend = 1300),linetype = 'dashed', color = 'grey75', alpha = 0.9) +

 geom_point(size = 7, shape = 22, color = 'grey80') +

 geom_label_repel(aes(label = 직무별), box.padding = 0.5, max.overlaps = 28, size = 3,min.segment.length = 0.5, segment.linetype = 5, segment.curvature = -1e-20, segment.alpha = 0.2) +

 geom_text(aes(label = Wage), size = 2, vjust = 0.5, hjust = 0.5, color = 'white') +

 scale_fill_manual(values=c("#999999", "#E69F00")) +

 scale_y_continuous(labels = scales::comma, limits = c(250, 1300)) +

 theme_minimal() +

 labs(x = '', y = '', title = 'SW관련 산업종사자 평균임금 ',

    subtitle = '(단위 : 월, 만원)',

    caption = 'Source : data.go.kr',

    color = '인원 증가율 (%)') +

 theme(axis.title.x=element_blank(),

    axis.text.x=element_blank(),

    axis.ticks.x=element_blank(),

    axis.text.y =element_blank(),

    axis.ticks.y=element_blank(),

    panel.background = element_rect(color = 'grey80', fill = 'grey80'),

    plot.background = element_rect(color = 'grey80', fill = 'grey80'),

    panel.grid = element_blank(),

    legend.position = 'none',

    plot.title = element_text(family = 'NanumGothic',face = 'bold', size = 20),

    plot.subtitle = element_text(family = 'NanumGothic', color = "#525252"),

    plot.caption = ggtext::element_markdown(family = 'NanumGothic', size = 8, color = "#525252"))

 

ggsave("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/visual2/13.SW_Wage.png", width = 25, height = 17, units = "cm", dpi = 500)


```



```{r}



# https://gsis.kwdi.re.kr/statHtml/statHtml.do?orgId=338&tblId=DT_1XD7002 # 청년층 이직 관련

# https://kosis.kr/statisticsList/statisticsListIndex.do?parentId=D.1&vwcd=MT_ZTITLE&menuId=M_01_01#content-group # 첫직장 근속기간


jobFlow = read.csv('/Users/jungwonwoo/Desktop/addition/Curs_Job_inflow.csv', fileEncoding = "euc-kr", header = TRUE)



jobFlow %>%

 t() %>%

 as.data.frame() %>%

 rownames_to_column() %>%

 .[-c(1:3),] %>%

 rename(time = rowname,

     clc = V1,

     total = V2,

     hs_grad = V3,

     col_grad = V4) %>%

 mutate(time = case_when(str_detect(time, 'X2019..05') ~ '20190501',

             str_detect(time, 'X2020..05') ~ '20200501',

             str_detect(time, 'X2021..05') ~ '20210501',

             TRUE ~ 'error')) %>%

 mutate(time = lubridate::ymd(time),

     time = as.factor(lubridate::year(time))) %>%

 transmute(time, clc, total = as.numeric(total)) %>%

 filter(clc != '졸업/중퇴 후 취업 유경험자',

     clc != '그 외') %>%

 ggplot(aes(x = reorder(clc, -total), y = total, fill = time)) +

 geom_bar(stat="identity",

      position=position_dodge(),

      alpha = 0.4,

      width = 0.4) +

 annotate("rect", xmin = 1.5, xmax = 2.5, ymin = 600, ymax = 900, alpha = .2) +

 annotate("rect", xmin = 0.5, xmax = 1.5, ymin = 1000, ymax = 1300, alpha = .2) +

 annotate("text", x = 3.5, y = 1000, size = 3.0, label = "여전히 대부분의 취업 경로는 \n 공개시험 및 인터넷을 통한 구직으로 보이지만,\n추천에 의한 입사 또한 적지 않습니다.", family = 'NanumGothic', fontface = 2, color = 'grey40') +

 scale_fill_brewer(palette = "BuPu") +

 coord_flip() +

 theme_minimal() +

 labs(x = '', y = '', title = '취업 경로 조사',

    subtitle = '(단위 : 명)',

    caption = 'Source : data.go.kr',

    color = '인원 증가율 (%)') +

 theme(axis.title.x=element_text(family = 'NanumGothic',color = "#525252"),

    axis.text.x=element_text(family = 'NanumGothic',color = "#525252", size = 9),

    axis.ticks.x=element_blank(),

    axis.text.y =element_text(family = 'NanumGothic',color = "#525252", face = 'bold'),

    axis.ticks.y=element_blank(),

    panel.background = element_rect(color = 'grey80', fill = 'grey80'),

    plot.background = element_rect(color = 'grey80', fill = 'grey80'),

    panel.grid = element_blank(),

    legend.position = 'bottom',

    legend.title = element_blank(),

    plot.title = element_text(family = 'NanumGothic',face = 'bold', size = 20),

    plot.subtitle = element_text(family = 'NanumGothic', color = "#525252"),

    plot.caption = ggtext::element_markdown(family = 'NanumGothic', size = 8, color = "#525252"))

 

ggsave("/Users/jungwonwoo/Desktop/program_file/6.Dacon/Job_care/visual2/15.InRoute.png", width = 25, height = 17, units = "cm", dpi = 500)


  


```