# R: 複数選択回答（multiple-choice answers）の処理方法, stringr, ddply ver. 4

Last Updated on May 18, 2020 by shibatau

### II.サンプルの作成

R multiple choice questionnaire data to ggplot

サンプルデータを作成するスクリプトと作成されたdfは次です。

# スクリプト
df = data.frame(Participant = seq(1:100),
Q1_1 = sample(c("a", ""), 100, replace = T, prob = c(1/2, 1/2)),
Q1_2 = sample(c("b", ""), 100, replace = T, prob = c(2/3, 1/3)),
Q1_3 = sample(c("c", ""), 100, replace = T, prob = c(1/3, 2/3)))
df$answer = paste0(df$Q1_1, df$Q1_2, df$Q1_3)

# dfを実行
Participant Q1_1 Q1_2 Q1_3 answer
1             1    a    b    c    abc
2             2    a    b          ab
3             3
4             4         b    c     bc
5             5    a    b          ab
6             6    a    b          ab
7             7         b           b
8             8         b    c     bc
9             9    a    b          ab
10           10    a    b    c    abc
11           11    a                a
12           12    a    b          ab
13           13    a    b          ab
14           14         b           b
15           15              c      c
16           16    a    b    c    abc
17           17    a                a
18           18    a    b          ab
19           19         b           b
20           20         b    c     bc
21           21         b           b
22           22
23           23              c      c
24           24    a                a
25           25
26           26    a    b          ab
27           27    a    b    c    abc
28           28    a                a
29           29         b           b
30           30              c      c
31           31    a    b    c    abc
32           32    a                a
33           33         b           b
34           34         b           b
35           35         b           b
36           36    a    b    c    abc
37           37    a                a
38           38    a    b    c    abc
39           39         b           b
40           40         b    c     bc
41           41    a         c     ac
42           42    a                a
43           43
44           44         b    c     bc
45           45    a    b          ab
46           46         b           b
47           47         b    c     bc
48           48         b    c     bc
49           49         b           b
50           50    a    b          ab
51           51    a         c     ac
52           52    a    b    c    abc
53           53    a                a
54           54    a                a
55           55    a                a
56           56
57           57         b    c     bc
58           58         b    c     bc
59           59    a                a
60           60    a    b          ab
61           61    a    b          ab
62           62              c      c
63           63    a    b          ab
64           64         b           b
65           65
66           66    a    b          ab
67           67    a    b          ab
68           68         b           b
69           69
70           70    a    b    c    abc
71           71    a                a
72           72         b    c     bc
73           73    a    b          ab
74           74         b           b
75           75    a    b          ab
76           76         b    c     bc
77           77    a    b          ab
78           78
79           79         b           b
80           80         b           b
81           81    a    b          ab
82           82    a    b    c    abc
83           83    a         c     ac
84           84         b           b
85           85    a    b    c    abc
86           86    a    b    c    abc
87           87    a         c     ac
88           88         b    c     bc
89           89    a    b          ab
90           90    a    b          ab
91           91    a                a
92           92    a                a
93           93
94           94         b           b
95           95    a    b    c    abc
96           96         b    c     bc
97           97         b    c     bc
98           98    a    b    c    abc
99           99         b           b
100         100         b           b

### III.選択肢の出現回数

ライブラリstringrのstr_countを用いて選択肢の文字列を数えます。

# 選択肢の出現回数を算出 freq <- df %>% select(answer) %>% summarize(freq_a = sum(str_count(df$answer, "a")), freq_b = sum(str_count(df$answer, "b")), freq_c = sum(str_count(df$answer, "c"))) freqC = sum(str_count(df$answer, "c")))

freq_a freq_b freq_c 47     66     44

### IV.回答の出現回数

ライブラリplyrのddply()を用いて回答の文字列を数えます。

freq2 <- ddply(df, c("answer"), summarize, freq = length(answer))

 answer freq        14 a      8 ab     17 abc    14 ac     8 b      17 bc     18 c      4

### V.全スクリプト

# https://stackoverflow.com/questions/39855203/r-multiple-choice-questionnaire-data-to-ggplot # install.packages("ddply") # ライブラリをインストール library(plyr) library(tidyverse) library(str) # データを作成 df = data.frame(Participant = seq(1:100), Q1_1 = sample(c("a", ""), 100, replace = T, prob = c(1/2, 1/2)), Q1_2 = sample(c("b", ""), 100, replace = T, prob = c(2/3, 1/3)), Q1_3 = sample(c("c", ""), 100, replace = T, prob = c(1/3, 2/3))) df$answer = paste0(df$Q1_1, df$Q1_2, df$Q1_3) # 選択肢の出現回数を算出 freq1 <- df %>% select(answer) %>% summarise(freq_a = sum(str_count(df$answer, "a")), freq_b = sum(str_count(df$answer, "b")), freq_c = sum(str_count(df$answer, "c"))) str(freq1) # 横長から縦長データへ freq1_long <- freq1 %>% pivot_longer( freq_a:freq_c, names_to = "selection", # 列名をtest列にまとめる names_prefix = "freq_", # 列名のgoiを除く values_to = "freq") # 得点をscore列にまとめる freq1_long # 棒グラフの作成 # Outside bars p1 <- ggplot(freq1_long, aes(x=selection, y=freq)) + geom_bar(stat="identity", fill="steelblue")+ geom_text(aes(label=freq), vjust=2, hjust=0.5, color="white", size=3.5) + theme_minimal() + theme_gray (base_family = "HiraKakuPro-W3") p1 ## 回答の出現回数を算出 freq2 <- ddply(df, c("answer"), summarize, freq = length(answer)) # ファクターの順序を指定 freq2$answer <- factor(freq2\$answer, levels=c("", "a", "b", "c", "ab", "ac", "bc", "abc")) # グラフ作成 ggplot(freq2, aes(x=answer, y=freq, fill = answer)) + geom_bar(stat = "identity") + theme_bw()