1
- # READ ME: https://github.com/KonScience/Summarize-Flattr-Reports#summarize-flattr-reports
1
+ # Please read https://github.com/KonScience/Summarize-Flattr-Reports#summarize-flattr-reports
2
+
3
+ rm(list = ls()) # clean workspace
4
+ original_wd <- getwd() # save current working directory
5
+ Sys.setlocale(" LC_ALL" , " UTF-8" ) # respect non-ASCII symbols like German umlauts on Mac OSX, learned from https://stackoverflow.com/questions/8145886/
6
+ options(stringsAsFactors = FALSE , row.names = FALSE , limitsize = FALSE ) # set global options
2
7
3
- # load packages for data frame manipulation & diagram drawing
4
8
# see http://www.r-bloggers.com/library-vs-require-in-r/ for require() vs. library() discussion
5
9
library(scales )
6
10
library(ggplot2 )
7
11
library(plyr )
8
12
9
13
# get all filenames of Flattr Monthly Revenue CSV; assumes that all were downloaded into same folder
10
-
11
14
args <- commandArgs(trailingOnly = TRUE )
12
-
13
15
if (length(args ) == 0 ) { # execute via: Rscript path/to/summarize-flattr-reports.R path/to/flattr-revenue-000000.csv
14
16
print(" Please select one of the 'flattr-revenue-....csv' files from the folder you downloaded them to." )
15
17
first_flattr_file <- file.choose()
16
18
flattr_dir <- dirname(first_flattr_file ) # learned from http://stackoverflow.com/a/18003224
17
19
} else {
18
20
if ((substring(args [1 ], 1 , 1 ) == " /" ) || (substring(args [1 ], 2 , 2 ) == " :" )) {
19
21
flattr_dir <- dirname(args [1 ]) # set absolute directory by cli argument
20
- } else {
21
- flattr_dir <- dirname(file.path(getwd(), args [1 ], fsep = .Platform $ file.sep )) # set relative directory by cli argument
22
- }
22
+ } else {flattr_dir <- dirname(file.path(getwd(), args [1 ], fsep = .Platform $ file.sep ))} # set relative directory by cli argument
23
23
}
24
-
25
24
Flattr_filenames <- list.files(flattr_dir , pattern = " flattr-revenue-20[0-9]{4}.csv" )
26
-
27
- # move working directory to .csv files but save original
28
- original_wd <- getwd()
29
25
setwd(flattr_dir )
30
- options(stringsAsFactors = FALSE )
31
26
32
- # check for summary file of previously processed data & add new reports , instead of reading in every files again
27
+ # use summary file if available & create if not , instead of reading files individually
33
28
try(known_raw <- read.csv2(" flattr-revenue-000000.csv" , encoding = " UTF-8" ))
34
-
35
29
if (" flattr-revenue-000000.csv" %in% list.files(flattr_dir , pattern = " *.csv" )) {
36
30
# check for existing raw date & merge with new
37
31
if (length(unique(known_raw $ period )) < length(Flattr_filenames )) {
@@ -55,20 +49,14 @@ if ("flattr-revenue-000000.csv" %in% list.files(flattr_dir, pattern = "*.csv"))
55
49
encoding = " UTF-8" # learned from RTFM, but works only on Win7
56
50
)) # Function structure learned from https://stat.ethz.ch/pipermail/r-help/2010-October/255593.html
57
51
}} else {raw <- do.call(" rbind" , lapply(Flattr_filenames , read.csv2 , encoding = " UTF-8" ))} # same as inner else, just to catch edge case of repetive plotting without adding new Revenue Reports
58
-
59
- Sys.setlocale(" LC_ALL" , " UTF-8" ) # respect non-ASCII symbols like German umlauts on Mac OSX, learned from https://stackoverflow.com/questions/8145886/
60
-
61
- # export aggregated data for next (month's) run
62
- write.csv2(raw , " flattr-revenue-000000.csv" , row.names = FALSE )
52
+ write.csv2(x = raw , file = " flattr-revenue-000000.csv" )
63
53
64
54
# append 1st days to months & convert to date format; learned from http://stackoverflow.com/a/4594269
65
55
raw $ period <- as.Date(paste(raw $ period , " -01" ), format = " %Y-%m -%d" )
66
56
raw $ EUR_per_click <- raw $ revenue / raw $ clicks
67
57
68
58
# populate raw data with all_revenue for each thing
69
- for (i in 1 : dim(raw )[1 ]){
70
- raw $ all_revenue [i ] <- sum(subset(raw , title == raw $ title [i ])$ revenue )
71
- }
59
+ for (i in 1 : nrow(raw )){raw $ all_revenue [i ] <- sum(subset(raw , title == raw $ title [i ])$ revenue )}
72
60
73
61
# determine dataset size to auto-adjust plots
74
62
N_months <- length(Flattr_filenames )
@@ -81,21 +69,16 @@ per_thing <- ddply(.data = raw,
81
69
all_clicks = sum(clicks ),
82
70
all_revenue = sum(revenue ))
83
71
per_thing <- per_thing [order(per_thing $ all_revenue , decreasing = TRUE ),]
84
- rownames(per_thing ) <- NULL
85
- write.csv2(x = per_thing ,
86
- file = " flattr-revenue-things.csv" ,
87
- row.names = FALSE )
72
+ write.csv2(per_thing , " flattr-revenue-things.csv" )
88
73
89
74
# summarize & order by month and thing to provide click-value development over time
90
75
per_month_and_thing <- ddply(raw ,
91
76
c(" period" , " title" , " EUR_per_click" ),
92
- summarize , all_clicks = sum(clicks ),
77
+ summarize ,
78
+ all_clicks = sum(clicks ),
93
79
all_revenue = sum(revenue ))
94
80
per_month_and_thing <- per_month_and_thing [order(per_month_and_thing $ title ),]
95
- rownames(per_month_and_thing ) <- NULL
96
- write.csv2(per_month_and_thing ,
97
- " flattr-revenue-clicks.csv" ,
98
- row.names = FALSE )
81
+ write.csv2(per_month_and_thing , " flattr-revenue-clicks.csv" )
99
82
100
83
# summarize & export revenue per month
101
84
per_month <- ddply(raw ,
@@ -104,58 +87,63 @@ per_month <- ddply(raw,
104
87
all_clicks = sum(clicks ),
105
88
all_revenue = sum(revenue ))
106
89
per_month <- per_month [order(per_month $ period ),]
107
- write.csv2(per_month ,
108
- " flattr-revenue-months.csv" ,
109
- row.names = FALSE )
90
+ write.csv2(per_month , " flattr-revenue-months.csv" )
110
91
111
92
# revenue per click and month colored by thing, with trends for everything & best thing
112
93
best_thing <- subset(per_month_and_thing , title == per_thing [1 ,1 ]) # reduces data frame to best thing, for later trendline
113
- rownames(best_thing ) <- NULL
114
94
best_thing $ EUR_per_click <- best_thing $ all_revenue / best_thing $ all_clicks
115
95
116
- flattr_plot <- ggplot(data = raw , mapping = aes(x = period , y = EUR_per_click ,
117
- size = raw $ revenue , # points sized according to revenue of that thing in that month => bubble plot
118
- colour = factor (title ))) +
96
+ flattr_plot <- ggplot(data = raw ,
97
+ mapping = aes(x = period ,
98
+ y = EUR_per_click ,
99
+ size = raw $ revenue , # points sized according to revenue of that thing in that month => bubble plot
100
+ colour = factor (title ))) +
119
101
geom_jitter() + # same as geom_point(position = "jitter"); spreads data points randomly around true x value bit; day-exact resolution not (yet) possible
120
- labs(list (title = " Development of Flattr Revenue per Click" , x = NULL ,
121
- y = expression(" EUR per Flattr (extremes omitted)" ))) + # learned from http://docs.ggplot2.org/current/labs.html
122
- stat_smooth(mapping = aes(best_thing $ period , best_thing $ EUR_per_click , size = best_thing $ all_revenue ),
123
- data = best_thing , method = " auto" , show_guide = FALSE , size = N_months / 20 ,
102
+ labs(title = " Development of Flattr Revenue per Click" ,
103
+ x = NULL , y = expression(" EUR per Flattr (extremes omitted)" )) + # learned from http://docs.ggplot2.org/current/labs.html
104
+ stat_smooth(mapping = aes(best_thing $ period ,
105
+ best_thing $ EUR_per_click ,
106
+ size = best_thing $ all_revenue ),
107
+ data = best_thing ,
108
+ method = " auto" ,
109
+ show_guide = FALSE ,
110
+ size = N_months / 20 ,
124
111
se = FALSE , # confidence interval indicator
125
112
linetype = " dashed" ) + # learned from http://sape.inf.usi.ch/quick-reference/ggplot2/linetype
126
113
stat_smooth(aes(group = 1 ), # plots trendline over all values; otherwise: one for each thing; learned from http://stackoverflow.com/a/12810890
127
- method = " auto" , se = FALSE , color = " darkgrey" , show_guide = FALSE , size = N_months / 20 ) +
128
- scale_y_continuous(limits = c(0 , mean(raw $ EUR_per_click ) * 5 ), # omit y-values larger than 5x arithmetic mean learned from http://stackoverflow.com/a/26558070
114
+ method = " auto" ,
115
+ se = FALSE ,
116
+ color = " darkgrey" ,
117
+ show_guide = FALSE ,
118
+ size = N_months / 20 ) +
119
+ scale_y_continuous(limits = c(0 , mean(raw $ EUR_per_click ) * 5 ), # omit extreme y-values; learned from http://stackoverflow.com/a/26558070
129
120
expand = c(0 , 0 )) +
130
- theme_classic( ) +
131
- theme( legend.position = " none " )
121
+ theme( legend.position = " none " ) +
122
+ theme_classic( base_size = sqrt( N_things + N_months ) )
132
123
flattr_plot
133
- ggsave(" flattr-revenue-clicks.png" , flattr_plot , limitsize = FALSE )
124
+ ggsave(" flattr-revenue-clicks.png" , height = N_things / 3 , width = N_months / 1.5 )
134
125
135
126
# revenue per month and thing
136
- monthly_advanced_plot <- ggplot(per_month_and_thing , aes(x = period , y = all_revenue , fill = factor (title ))) +
127
+ monthly_advanced_plot <- ggplot(per_month_and_thing , aes(period , all_revenue , fill = factor (title ))) +
137
128
geom_bar(stat = " identity" ) +
138
- labs(list ( title = " Development of Flattr Revenue by Things" , x = NULL , y = " EUR received" ) ) +
129
+ labs(title = " Development of Flattr Revenue by Things" , x = NULL , y = " EUR received" ) +
139
130
scale_y_continuous(limits = c(0 , max(per_month $ all_revenue ) * 1.1 ), expand = c(0 , 0 )) +
140
131
scale_x_date(expand = c(0 , 0 )) +
141
- theme_classic( ) +
142
- theme( legend.position = " none " )
132
+ theme( legend.position = " none " ) +
133
+ theme_classic( base_size = ( N_things + N_months ) / 5 )
143
134
monthly_advanced_plot
144
- ggsave(" flattr-revenue-months.png" , monthly_advanced_plot , limitsize = FALSE )
135
+ ggsave(" flattr-revenue-months.png" , height = N_things / 3 , width = N_months / 1.5 )
145
136
146
137
# total revenue per month with trend
147
- monthly_simple_plot <- ggplot(data = per_month , aes(x = period , y = all_revenue )) +
138
+ monthly_simple_plot <- ggplot(per_month , aes(x = period , y = all_revenue )) +
148
139
geom_bar(stat = " identity" , group = 1 , fill = " #ED8C3B" ) +
149
- labs(list (title = " Development of Flattr Revenue" ,
150
- y = " EUR received" ,
151
- x = NULL )) +
152
- stat_smooth(data = per_month , method = " auto" , color = " #80B04A" , size = N_months / 5 ) + # fit trend plus confidence interval
153
- scale_y_continuous(limits = c(0 , max(per_month $ all_revenue ) * 1.1 ), # omit negative y-values & limit positive y-axis to 10% overhead over maximum value
154
- expand = c(0 , 0 )) +
140
+ labs(title = " Development of Flattr Revenue" , x = NULL , y = " EUR received" ) +
141
+ stat_smooth(data = per_month , method = " auto" , color = " #80B04A" , size = N_months / 5 ) + # fit trend plus confidence interval
142
+ scale_y_continuous(limits = c(0 , max(per_month $ all_revenue ) * 1.1 ), expand = c(0 , 0 )) +
155
143
scale_x_date(expand = c(0 , 0 )) +
156
- theme_classic()
144
+ theme_classic(base_size = ( N_things + N_months ) / 10 )
157
145
monthly_simple_plot
158
- ggsave(" flattr-revenue-months-summarized.png" , monthly_simple_plot , limitsize = FALSE )
146
+ ggsave(" flattr-revenue-months-summarized.png" )
159
147
160
148
161
149
# revenue per location of button
@@ -177,39 +165,29 @@ per_month_and_domain <- ddply(raw,
177
165
all_clicks = sum(clicks ),
178
166
all_revenue = sum(revenue ))
179
167
180
- monthly_domain_plot <- ggplot(per_month_and_domain , aes(x = period , y = all_revenue , fill = factor (domain ))) +
168
+ monthly_domain_plot <- ggplot(per_month_and_domain , aes(period , all_revenue , fill = factor (domain ))) +
181
169
geom_bar(stat = " identity" ) +
182
- labs(list (title = " Development of Flattr Revenue by Button Locations" ,
183
- y = " EUR received" ,
184
- x = NULL ,
185
- fill = " Domains" )) +
170
+ labs(title = " Development of Flattr Revenue by Button Locations" , x = NULL , y = " EUR received" , fill = " Domains" ) +
186
171
guides(fill = guide_legend(reverse = TRUE )) +
187
172
scale_x_date(expand = c(0 ,0 )) +
188
- scale_y_continuous(limits = c(0 , max(per_month $ all_revenue ) * 1.1 ),
189
- expand = c(0 , 0 )) +
173
+ scale_y_continuous(limits = c(0 , max(per_month $ all_revenue ) * 1.1 ), expand = c(0 , 0 )) +
190
174
scale_fill_brewer(type = " qual" ) +
191
- theme_classic()
192
- monthly_domain_plot
193
- ggsave(" flattr-revenue-months-domain.png" , monthly_domain_plot , limitsize = FALSE )
194
-
195
- monthly_domain_plot_fractions <- ggplot(per_month_and_domain ,
196
- aes(period , all_revenue , fill = factor (domain ))) +
197
- geom_bar(position = " fill" ,
198
- stat = " identity" ) +
199
- labs(list (title = " Fractions of Flattr Revenue by Button Locations" ,
200
- x = NULL , y = NULL ,
201
- fill = " Domains" )) +
175
+ theme_classic(base_size = (N_things + N_months ) / 10 )
176
+ monthly_domain_plot
177
+ ggsave(" flattr-revenue-months-domain.png" )
178
+
179
+ monthly_domain_plot_fractions <- ggplot(per_month_and_domain , aes(period , all_revenue , fill = factor (domain ))) +
180
+ geom_bar(position = " fill" , stat = " identity" ) +
181
+ labs(title = " Fractions of Flattr Revenue by Button Locations" ,
182
+ x = NULL , y = NULL , fill = " Domains" ) +
202
183
guides(fill = guide_legend(reverse = TRUE )) +
203
184
scale_x_date(expand = c(0 ,0 )) +
204
185
scale_y_continuous(expand = c(0 , 0 )) +
205
186
scale_fill_brewer(type = " qual" ) +
206
- theme_classic()
187
+ theme_classic(base_size = ( N_things + N_months ) / 10 )
207
188
monthly_domain_plot_fractions
208
- ggsave(" flattr-revenue-months-domain-fractions.png" , monthly_domain_plot , limitsize = FALSE )
189
+ ggsave(" flattr-revenue-months-domain-fractions.png" )
209
190
210
191
# sort & export after plotting in order to preserve alphabatic sorting in of domains in plot
211
192
per_month_and_domain <- per_month_and_domain [order(per_month_and_domain $ all_revenue ),]
212
- rownames(per_month_and_domain ) <- NULL
213
- write.table(per_month_and_domain ,
214
- " flattr-revenue-clicks-domain.csv" ,
215
- row.names = FALSE )
193
+ write.csv2(per_month_and_domain , " flattr-revenue-clicks-domain.csv" )
0 commit comments