vignettes/sparks-anu-graphs-for-open-science-presentation.Rmd
sparks-anu-graphs-for-open-science-presentation.Rmd
rrpp <- gsheet::gsheet2tbl(
"https://docs.google.com/spreadsheets/d/19gXobV4oPZeWZiQJAPNIrmqpfGQtpapXWcSxaXRw1-M/edit#gid=1699540381"
)
ggplot(rrpp, aes(x = art_class)) +
geom_bar(stat = "count",
aes(fill = art_class)) +
xlab("Class(es)") +
scale_fill_few() +
ggtitle("Article Classification") +
coord_flip() +
theme_ipsum_rc() +
guides(fill = FALSE)
ggplot(rrpp, aes(x = abbreviation)) +
geom_bar(stat = "count",
aes(fill = art_class)) +
xlab("Journal Abbreviation") +
scale_fill_few(name = "Article Class") +
ggtitle("Journals Sampled") +
coord_flip() +
theme_ipsum_rc()
ggplot(rrpp, aes(x = comp_mthds_avail)) +
geom_bar(aes(fill = art_class)) +
ggtitle("Computational Methods Availability") +
xlab("Score") +
scale_fill_few(name = "Article Class") +
coord_flip() +
theme_ipsum_rc()
#> Warning: Removed 6 rows containing non-finite values (stat_count).
Was the software used readily available and preferably open-source?
ggplot(rrpp, aes(x = software_avail)) +
geom_bar(aes(fill = art_class)) +
ggtitle("Software Availability") +
xlab("Score") +
scale_fill_few(name = "Article Class") +
coord_flip() +
theme_ipsum_rc()
#> Warning: Removed 3 rows containing non-finite values (stat_count).
Was the data made available?
ggplot(rrpp, aes(x = data_avail)) +
geom_bar(aes(fill = art_class)) +
ggtitle("Data Availability") +
xlab("Score") +
scale_fill_few(name = "Article Class") +
coord_flip() +
theme_ipsum_rc()
Was the software that was used properly cited?
ggplot(rrpp, aes(x = software_cite)) +
geom_bar(aes(fill = art_class)) +
ggtitle("Software Cited") +
xlab("Score") +
scale_fill_few(name = "Article Class") +
coord_flip() +
theme_ipsum_rc()
#> Warning: Removed 3 rows containing non-finite values (stat_count).
Count and sort top 10 software packages cited. There are likely others that are used, but they have not been properly identified by the authors.
tab <- table(rrpp_software$software_used)
tab_s <- sort(tab)
top10 <- tail(names(tab_s), 17) # checking the table, there are several ties
top_software <- subset(rrpp_software, software_used %in% top10)
top_software$software_used <- factor(top_software$software_used,
levels = rev(top10))
ggplot(top_software, aes(x = software_used)) +
geom_bar(aes(fill = art_class)) +
ggtitle("Top 10 Software Used") +
xlab("Software") +
ylab("Count") +
scale_fill_few(name = "Article Class") +
coord_flip() +
theme_ipsum_rc()
The reproducibility score was calculated as the sum of the scores for computational method availability,
ggplot(rrpp, aes(x = reproducibility_score)) +
geom_bar(aes(fill = art_class)) +
ggtitle("Combined Reproducibility Score") +
xlab("Score") +
scale_fill_few(name = "Article Class") +
coord_flip() +
theme_ipsum_rc()
#> Warning: Removed 2 rows containing non-finite values (stat_count).
ggplot(rrpp,
aes(
fill = art_class,
colour = art_class,
x = art_class,
y = reproducibility_score
)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0)) +
geom_jitter(width = .15,
size = .5) +
geom_boxplot(width = 0.1,
outlier.shape = NA,
color = "#666666") +
ggtitle("Article Class Effect on Reproducibility") +
xlab("Class(es)") +
ylab("Score") +
scale_fill_few(name = "Article Class") +
scale_colour_few(name = "Article Class") +
coord_flip() +
theme_ipsum_rc() +
guides(fill = FALSE, colour = FALSE)
#> Warning: Removed 2 rows containing non-finite values (stat_ydensity).
#> Warning: Removed 2 rows containing non-finite values (stat_boxplot).
#> Warning: Removed 2 rows containing missing values (geom_point).
ggplot(rrpp, aes(y = reproducibility_score,
x = abbreviation)) +
geom_boxplot(fill = "#5da5da",
colour = "#666666") +
ggtitle("Journal Effect on Reproducibility") +
xlab("Journal") +
ylab("Score") +
scale_fill_few(name = "Article Class") +
scale_colour_few(name = "Article Class") +
coord_flip() +
theme_ipsum_rc() +
guides(fill = FALSE, colour = FALSE)
#> Warning: Removed 2 rows containing non-finite values (stat_boxplot).
ggplot(rrpp, aes(y = reproducibility_score,
x = assignee)) +
geom_flat_violin(position = position_nudge(x = .2, y = 0),
colour = "#5da5da",
fill = "#5da5da") +
geom_jitter(
width = .15,
size = .5,
colour = "#5da5da",
fill = "#5da5da"
) +
geom_boxplot(width = 0.1,
outlier.shape = NA,
color = "#666666") +
ggtitle("Evaluator Effect on Reproducibility Score") +
xlab("Evaluator") +
ylab("Score") +
scale_fill_few(name = "Article Class") +
scale_colour_few(name = "Article Class") +
coord_flip() +
theme_ipsum_rc() +
guides(fill = FALSE, colour = FALSE)
#> Warning: Removed 2 rows containing non-finite values (stat_ydensity).
#> Warning: Removed 2 rows containing non-finite values (stat_boxplot).
#> Warning: Removed 2 rows containing missing values (geom_point).
ggplot(rrpp, aes(y = reproducibility_score,
x = as.factor(round(IF_5year, 1)))) +
geom_boxplot(fill = "#5da5da",
colour = "#666666") +
ggtitle("Five Year IF Effect on Reproducibility Score") +
xlab("Impact Factor") +
ylab("Score") +
scale_fill_few(name = "Article Class") +
scale_colour_few(name = "Article Class") +
coord_flip() +
theme_ipsum_rc() +
guides(fill = FALSE, colour = FALSE)
#> Warning: Removed 2 rows containing non-finite values (stat_boxplot).
ggplot(rrpp,
aes(
y = reproducibility_score,
x = open)) +
geom_flat_violin(
position = position_nudge(x = .2, y = 0),
fill = "#5da5da",
colour = "#5da5da"
) +
geom_jitter(
width = .15,
size = .5,
fill = "#5da5da",
colour = "#5da5da"
) +
geom_boxplot(width = 0.1,
outlier.shape = NA,
color = "#666666") +
ggtitle("Open Access Effect on Reproducibility Score") +
xlab("Impact Factor") +
ylab("Score") +
scale_fill_few(name = "Article Class") +
scale_colour_few(name = "Article Class") +
coord_flip() +
theme_ipsum_rc() +
guides(fill = FALSE, colour = FALSE)
#> Warning: Removed 2 rows containing non-finite values (stat_ydensity).
#> Warning: Removed 2 rows containing non-finite values (stat_boxplot).
#> Warning: Removed 2 rows containing missing values (geom_point).
ggplot(rrpp,
aes(y = reproducibility_score,
x = as.factor(repro_inst))) +
geom_flat_violin(
position = position_nudge(x = .2, y = 0),
fill = "#5da5da",
colour = "#5da5da"
) +
geom_jitter(
width = .15,
size = .5,
fill = "#5da5da",
colour = "#5da5da"
) +
geom_boxplot(width = 0.1,
outlier.shape = NA,
color = "#666666") +
ggtitle("Reproducibility Instructions Effect on Reproducibility Score") +
xlab("Journal Reproducibility Instructions Score") +
ylab("Article Score") +
scale_fill_few(name = "Article Class") +
scale_colour_few(name = "Article Class") +
coord_flip() +
theme_ipsum_rc() +
guides(fill = FALSE, colour = FALSE)
#> Warning: Removed 2 rows containing non-finite values (stat_ydensity).
#> Warning: Removed 2 rows containing non-finite values (stat_boxplot).
#> Warning: Removed 2 rows containing missing values (geom_point).