Skip to content

Commit b93e8dd

Browse files
Merge pull request #23 from KonScience/develop
restyle code
2 parents dda8377 + 8bef760 commit b93e8dd

File tree

1 file changed

+63
-85
lines changed

1 file changed

+63
-85
lines changed

summarize-flattr-reports.R

Lines changed: 63 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,31 @@
1-
# READ ME: https://github.com/KonScience/Summarize-Flattr-Reports#summarize-flattr-reports
1+
# Please read https://github.com/KonScience/Summarize-Flattr-Reports#summarize-flattr-reports
2+
3+
rm(list = ls()) # clean workspace
4+
original_wd <- getwd() # save current working directory
5+
Sys.setlocale("LC_ALL", "UTF-8") # respect non-ASCII symbols like German umlauts on Mac OSX, learned from https://stackoverflow.com/questions/8145886/
6+
options(stringsAsFactors = FALSE, row.names = FALSE, limitsize = FALSE) # set global options
27

3-
# load packages for data frame manipulation & diagram drawing
48
# see http://www.r-bloggers.com/library-vs-require-in-r/ for require() vs. library() discussion
59
library(scales)
610
library(ggplot2)
711
library(plyr)
812

913
# get all filenames of Flattr Monthly Revenue CSV; assumes that all were downloaded into same folder
10-
1114
args <- commandArgs(trailingOnly = TRUE)
12-
1315
if (length(args) == 0) { # execute via: Rscript path/to/summarize-flattr-reports.R path/to/flattr-revenue-000000.csv
1416
print("Please select one of the 'flattr-revenue-....csv' files from the folder you downloaded them to.")
1517
first_flattr_file <- file.choose()
1618
flattr_dir <- dirname(first_flattr_file) # learned from http://stackoverflow.com/a/18003224
1719
} else {
1820
if ((substring(args[1], 1, 1) == "/") || (substring(args[1], 2, 2) == ":")) {
1921
flattr_dir <- dirname(args[1]) # set absolute directory by cli argument
20-
} else {
21-
flattr_dir <- dirname(file.path(getwd(), args[1], fsep = .Platform$file.sep)) # set relative directory by cli argument
22-
}
22+
} else {flattr_dir <- dirname(file.path(getwd(), args[1], fsep = .Platform$file.sep))} # set relative directory by cli argument
2323
}
24-
2524
Flattr_filenames <- list.files(flattr_dir, pattern = "flattr-revenue-20[0-9]{4}.csv")
26-
27-
# move working directory to .csv files but save original
28-
original_wd <- getwd()
2925
setwd(flattr_dir)
30-
options(stringsAsFactors = FALSE)
3126

32-
# check for summary file of previously processed data & add new reports, instead of reading in every files again
27+
# use summary file if available & create if not, instead of reading files individually
3328
try(known_raw <- read.csv2("flattr-revenue-000000.csv", encoding = "UTF-8"))
34-
3529
if ("flattr-revenue-000000.csv" %in% list.files(flattr_dir, pattern = "*.csv")) {
3630
# check for existing raw date & merge with new
3731
if (length(unique(known_raw$period)) < length(Flattr_filenames)) {
@@ -55,20 +49,14 @@ if ("flattr-revenue-000000.csv" %in% list.files(flattr_dir, pattern = "*.csv"))
5549
encoding = "UTF-8" # learned from RTFM, but works only on Win7
5650
)) # Function structure learned from https://stat.ethz.ch/pipermail/r-help/2010-October/255593.html
5751
}} else {raw <- do.call("rbind", lapply(Flattr_filenames, read.csv2, encoding = "UTF-8"))} # same as inner else, just to catch edge case of repetive plotting without adding new Revenue Reports
58-
59-
Sys.setlocale("LC_ALL", "UTF-8") # respect non-ASCII symbols like German umlauts on Mac OSX, learned from https://stackoverflow.com/questions/8145886/
60-
61-
# export aggregated data for next (month's) run
62-
write.csv2(raw, "flattr-revenue-000000.csv", row.names = FALSE)
52+
write.csv2(x = raw, file = "flattr-revenue-000000.csv")
6353

6454
# append 1st days to months & convert to date format; learned from http://stackoverflow.com/a/4594269
6555
raw$period <- as.Date(paste(raw$period, "-01"), format="%Y-%m -%d")
6656
raw$EUR_per_click <- raw$revenue / raw$clicks
6757

6858
# populate raw data with all_revenue for each thing
69-
for (i in 1:dim(raw)[1]){
70-
raw$all_revenue[i] <- sum(subset(raw, title == raw$title[i])$revenue)
71-
}
59+
for (i in 1:nrow(raw)){raw$all_revenue[i] <- sum(subset(raw, title == raw$title[i])$revenue)}
7260

7361
# determine dataset size to auto-adjust plots
7462
N_months <- length(Flattr_filenames)
@@ -81,21 +69,16 @@ per_thing <- ddply(.data = raw,
8169
all_clicks = sum(clicks),
8270
all_revenue = sum(revenue))
8371
per_thing <- per_thing[order(per_thing$all_revenue, decreasing = TRUE),]
84-
rownames(per_thing) <- NULL
85-
write.csv2(x = per_thing,
86-
file = "flattr-revenue-things.csv",
87-
row.names = FALSE)
72+
write.csv2(per_thing, "flattr-revenue-things.csv")
8873

8974
# summarize & order by month and thing to provide click-value development over time
9075
per_month_and_thing <- ddply(raw,
9176
c("period", "title", "EUR_per_click"),
92-
summarize, all_clicks = sum(clicks),
77+
summarize,
78+
all_clicks = sum(clicks),
9379
all_revenue = sum(revenue))
9480
per_month_and_thing <- per_month_and_thing[order(per_month_and_thing$title),]
95-
rownames(per_month_and_thing) <- NULL
96-
write.csv2(per_month_and_thing,
97-
"flattr-revenue-clicks.csv",
98-
row.names = FALSE)
81+
write.csv2(per_month_and_thing, "flattr-revenue-clicks.csv")
9982

10083
# summarize & export revenue per month
10184
per_month <- ddply(raw,
@@ -104,58 +87,63 @@ per_month <- ddply(raw,
10487
all_clicks = sum(clicks),
10588
all_revenue = sum(revenue))
10689
per_month <- per_month[order(per_month$period),]
107-
write.csv2(per_month,
108-
"flattr-revenue-months.csv",
109-
row.names = FALSE)
90+
write.csv2(per_month, "flattr-revenue-months.csv")
11091

11192
# revenue per click and month colored by thing, with trends for everything & best thing
11293
best_thing <- subset(per_month_and_thing, title == per_thing[1,1]) # reduces data frame to best thing, for later trendline
113-
rownames(best_thing) <- NULL
11494
best_thing$EUR_per_click <- best_thing$all_revenue / best_thing$all_clicks
11595

116-
flattr_plot <- ggplot(data = raw, mapping = aes(x = period, y = EUR_per_click,
117-
size = raw$revenue, # points sized according to revenue of that thing in that month => bubble plot
118-
colour = factor(title))) +
96+
flattr_plot <- ggplot(data = raw,
97+
mapping = aes(x = period,
98+
y = EUR_per_click,
99+
size = raw$revenue, # points sized according to revenue of that thing in that month => bubble plot
100+
colour = factor(title))) +
119101
geom_jitter() + # same as geom_point(position = "jitter"); spreads data points randomly around true x value bit; day-exact resolution not (yet) possible
120-
labs(list(title = "Development of Flattr Revenue per Click", x = NULL,
121-
y = expression("EUR per Flattr (extremes omitted)"))) + # learned from http://docs.ggplot2.org/current/labs.html
122-
stat_smooth(mapping = aes(best_thing$period, best_thing$EUR_per_click, size = best_thing$all_revenue),
123-
data = best_thing, method = "auto", show_guide = FALSE, size = N_months / 20,
102+
labs(title = "Development of Flattr Revenue per Click",
103+
x = NULL, y = expression("EUR per Flattr (extremes omitted)")) + # learned from http://docs.ggplot2.org/current/labs.html
104+
stat_smooth(mapping = aes(best_thing$period,
105+
best_thing$EUR_per_click,
106+
size = best_thing$all_revenue),
107+
data = best_thing,
108+
method = "auto",
109+
show_guide = FALSE,
110+
size = N_months / 20,
124111
se = FALSE, # confidence interval indicator
125112
linetype = "dashed") + # learned from http://sape.inf.usi.ch/quick-reference/ggplot2/linetype
126113
stat_smooth(aes(group = 1), # plots trendline over all values; otherwise: one for each thing; learned from http://stackoverflow.com/a/12810890
127-
method = "auto", se = FALSE, color = "darkgrey", show_guide = FALSE, size = N_months / 20) +
128-
scale_y_continuous(limits = c(0, mean(raw$EUR_per_click) * 5), # omit y-values larger than 5x arithmetic mean learned from http://stackoverflow.com/a/26558070
114+
method = "auto",
115+
se = FALSE,
116+
color = "darkgrey",
117+
show_guide = FALSE,
118+
size = N_months / 20) +
119+
scale_y_continuous(limits = c(0, mean(raw$EUR_per_click) * 5), # omit extreme y-values; learned from http://stackoverflow.com/a/26558070
129120
expand = c(0, 0)) +
130-
theme_classic() +
131-
theme(legend.position = "none")
121+
theme(legend.position = "none") +
122+
theme_classic(base_size = sqrt(N_things + N_months))
132123
flattr_plot
133-
ggsave("flattr-revenue-clicks.png", flattr_plot, limitsize = FALSE)
124+
ggsave("flattr-revenue-clicks.png", height = N_things/3, width = N_months/1.5)
134125

135126
# revenue per month and thing
136-
monthly_advanced_plot <- ggplot(per_month_and_thing, aes(x = period, y = all_revenue, fill = factor(title))) +
127+
monthly_advanced_plot <- ggplot(per_month_and_thing, aes(period, all_revenue, fill = factor(title))) +
137128
geom_bar(stat = "identity") +
138-
labs(list(title = "Development of Flattr Revenue by Things", x = NULL, y = "EUR received")) +
129+
labs(title = "Development of Flattr Revenue by Things", x = NULL, y = "EUR received") +
139130
scale_y_continuous(limits = c(0, max(per_month$all_revenue) * 1.1), expand = c(0, 0)) +
140131
scale_x_date(expand = c(0, 0)) +
141-
theme_classic() +
142-
theme(legend.position = "none")
132+
theme(legend.position = "none") +
133+
theme_classic(base_size = (N_things + N_months) / 5)
143134
monthly_advanced_plot
144-
ggsave("flattr-revenue-months.png", monthly_advanced_plot, limitsize = FALSE)
135+
ggsave("flattr-revenue-months.png", height = N_things/3, width = N_months/1.5)
145136

146137
# total revenue per month with trend
147-
monthly_simple_plot <- ggplot(data = per_month, aes(x = period, y = all_revenue)) +
138+
monthly_simple_plot <- ggplot(per_month, aes(x = period, y = all_revenue)) +
148139
geom_bar(stat = "identity", group = 1, fill = "#ED8C3B") +
149-
labs(list(title = "Development of Flattr Revenue",
150-
y = "EUR received",
151-
x = NULL)) +
152-
stat_smooth(data = per_month, method = "auto", color = "#80B04A", size = N_months / 5) + # fit trend plus confidence interval
153-
scale_y_continuous(limits = c(0, max(per_month$all_revenue) * 1.1), # omit negative y-values & limit positive y-axis to 10% overhead over maximum value
154-
expand = c(0, 0)) +
140+
labs(title = "Development of Flattr Revenue", x = NULL, y = "EUR received") +
141+
stat_smooth(data = per_month, method = "auto", color = "#80B04A", size = N_months/5) + # fit trend plus confidence interval
142+
scale_y_continuous(limits = c(0, max(per_month$all_revenue) * 1.1), expand = c(0, 0)) +
155143
scale_x_date(expand = c(0, 0)) +
156-
theme_classic()
144+
theme_classic(base_size = (N_things + N_months) / 10)
157145
monthly_simple_plot
158-
ggsave("flattr-revenue-months-summarized.png", monthly_simple_plot, limitsize = FALSE)
146+
ggsave("flattr-revenue-months-summarized.png")
159147

160148

161149
# revenue per location of button
@@ -177,39 +165,29 @@ per_month_and_domain <- ddply(raw,
177165
all_clicks = sum(clicks),
178166
all_revenue = sum(revenue))
179167

180-
monthly_domain_plot <- ggplot(per_month_and_domain, aes(x = period, y = all_revenue, fill = factor(domain))) +
168+
monthly_domain_plot <- ggplot(per_month_and_domain, aes(period, all_revenue, fill = factor(domain))) +
181169
geom_bar(stat = "identity") +
182-
labs(list(title = "Development of Flattr Revenue by Button Locations",
183-
y = "EUR received",
184-
x = NULL,
185-
fill = "Domains")) +
170+
labs(title = "Development of Flattr Revenue by Button Locations", x = NULL, y = "EUR received", fill = "Domains") +
186171
guides(fill = guide_legend(reverse = TRUE)) +
187172
scale_x_date(expand = c(0,0)) +
188-
scale_y_continuous(limits = c(0, max(per_month$all_revenue) * 1.1),
189-
expand = c(0, 0)) +
173+
scale_y_continuous(limits = c(0, max(per_month$all_revenue) * 1.1), expand = c(0, 0)) +
190174
scale_fill_brewer(type = "qual") +
191-
theme_classic()
192-
monthly_domain_plot
193-
ggsave("flattr-revenue-months-domain.png", monthly_domain_plot, limitsize = FALSE)
194-
195-
monthly_domain_plot_fractions <- ggplot(per_month_and_domain,
196-
aes(period, all_revenue, fill = factor(domain))) +
197-
geom_bar(position = "fill",
198-
stat = "identity") +
199-
labs(list(title = "Fractions of Flattr Revenue by Button Locations",
200-
x = NULL, y = NULL,
201-
fill = "Domains")) +
175+
theme_classic(base_size = (N_things + N_months) / 10)
176+
monthly_domain_plot
177+
ggsave("flattr-revenue-months-domain.png")
178+
179+
monthly_domain_plot_fractions <- ggplot(per_month_and_domain, aes(period, all_revenue, fill = factor(domain))) +
180+
geom_bar(position = "fill", stat = "identity") +
181+
labs(title = "Fractions of Flattr Revenue by Button Locations",
182+
x = NULL, y = NULL, fill = "Domains") +
202183
guides(fill = guide_legend(reverse = TRUE)) +
203184
scale_x_date(expand = c(0,0)) +
204185
scale_y_continuous(expand = c(0, 0)) +
205186
scale_fill_brewer(type = "qual") +
206-
theme_classic()
187+
theme_classic(base_size = (N_things + N_months) / 10)
207188
monthly_domain_plot_fractions
208-
ggsave("flattr-revenue-months-domain-fractions.png", monthly_domain_plot, limitsize = FALSE)
189+
ggsave("flattr-revenue-months-domain-fractions.png")
209190

210191
# sort & export after plotting in order to preserve alphabatic sorting in of domains in plot
211192
per_month_and_domain <- per_month_and_domain[order(per_month_and_domain$all_revenue),]
212-
rownames(per_month_and_domain) <- NULL
213-
write.table(per_month_and_domain,
214-
"flattr-revenue-clicks-domain.csv",
215-
row.names = FALSE)
193+
write.csv2(per_month_and_domain, "flattr-revenue-clicks-domain.csv")

0 commit comments

Comments
 (0)