citationexchange
Code to accompany discussion of "Statistical Modelling of Citation Exchange Between Statistics Journals" by Cristiano Varin, Manuela Cattelan and David Firth
Science Score: 18.0%
This score indicates how likely this project is to be science-related based on various indicators:
-
✓CITATION.cff file
Found CITATION.cff file -
○codemeta.json file
-
○.zenodo.json file
-
○DOI references
-
○Academic publication links
-
○Academic email domains
-
○Institutional organization owner
-
○JOSS paper metadata
-
○Scientific vocabulary similarity
Low similarity (1.0%) to scientific vocabulary
Last synced: 10 months ago
·
JSON representation
·
Repository
Code to accompany discussion of "Statistical Modelling of Citation Exchange Between Statistics Journals" by Cristiano Varin, Manuela Cattelan and David Firth
Basic Info
- Host: GitHub
- Owner: jcarlen
- Language: TeX
- Default Branch: master
- Size: 1.6 MB
Statistics
- Stars: 0
- Watchers: 2
- Forks: 0
- Open Issues: 0
- Releases: 0
Created almost 11 years ago
· Last pushed over 10 years ago
Metadata Files
Readme
Citation
README.md
CitationExchange
Code to accompany discussion of "Statistical Modelling of Citation Exchange Between Statistics Journals" by Cristiano Varin, Manuela Cattelan and David Firth
Owner
- Name: jcarlen
- Login: jcarlen
- Kind: user
- Website: https://jcarlen.github.io/
- Repositories: 18
- Profile: https://github.com/jcarlen
Senior Data Scientist, Los Angeles Times
Citation (CitationExchange_Code.R)
# Code from Carlen and Handcock's "Discussion on 'Statistical Modelling of Citation Exchange Between Statistics
# Journals’ by Cristiano Varin, Manuela Cattelan and David Firth"
------------------------------------------------------------------------------
# Pre-Processing -------
library(latentnet)
library(ergm.count)
library(stats)
download.file("http://cristianovarin.weebly.com/uploads/1/5/1/5/15156956/varin_cattelan_firth_supplement.zip", "varin_cattelan_firth_supplement.zip")
unzip("varin_cattelan_firth_supplement.zip")
Cmatrix <- as.matrix(read.csv("Varin_Cattelan_Firth_supplement/Data/cross-citation-matrix.csv", row.names = 1))
#as valued net (#see Modeling valued networks with statnet paper)
#removes self-citations; Stigler method also excluded self-citations
Cnet = as.network(t(Cmatrix), directed=T, matrix.type="a", ignore.eval=F, names.eval="citations")
# Pre-Processing 2: Code from from Varin_Cattelan_Firth_supplement.R published with article -------
## From Section 1 & 2
journal.abbr <- rownames(Cmatrix)
Tmatrix <- Cmatrix + t(Cmatrix)
diag(Tmatrix) <- diag(Cmatrix)
journals.cluster <- hclust(d = as.dist(1 - cor(Tmatrix)))
## From Section 3: Quasi-Stigler Model
library(BradleyTerry2)
Cdata <- countsToBinomial(Cmatrix)
fit <- BTm(outcome = cbind(win1, win2), player1 = player1, player2 = player2, data = Cdata)
npairs <- NROW(Cdata)
njournals <- nlevels(Cdata$player1)
phi <- sum(residuals(fit, "pearson")^2) / (npairs - (njournals - 1))
### "3.1 Journal residuals"
journal.res <- rep(NA, njournals)
res <- residuals(fit, type = "pearson")
coefs <- c(0, coef(fit)) # 0 is the coefficient of the first journal
for(i in 1:njournals){
A <- which(Cdata$player1 == journal.abbr[i])
B <- which(Cdata$player2 == journal.abbr[i])
y <- c(res[A], -res[B])
x <- c(-coefs[Cdata$player2[A]], -coefs[Cdata$player1[B]])
journal.res[i] <- sum(y * x) / sqrt(phi * sum(x ^ 2))
}
names(journal.res) <- journal.abbr
library(qvcalc)
cov.matrix <- matrix(0, nrow = njournals, ncol = njournals)
cov.matrix[-1, -1] <- vcov(fit)
qse <- qvcalc(phi * cov.matrix , estimates = c(0, coef(fit)),
labels = journal.abbr)
export.scores <- qse$qvframe$estimate
export.scores <- export.scores - mean(export.scores)
names(export.scores) <- journal.abbr
sort.id <- sort(export.scores, decreasing = TRUE, index.return = TRUE)$ix
fit.table <- data.frame(quasi = export.scores[sort.id], qse = qse$qvframe$quasiSE[sort.id])
## Some post-processing/alphabetizing (not from author supplement)
rownames(fit.table)
match(rownames(fit.table),Cnet%v%"vertex.names")
fit.table2 = fit.table[order(match(rownames(fit.table),Cnet%v%"vertex.names")),]
# Network model 1: Sender-receiver model with citation counts ~ Poisson -------
latent.srp1 = ergmm(Cnet~ sender(base = 0) + receiver(base = 0) - 1, response = "citations",
family = "Poisson.log", control = control.ergmm(burnin = 40000), seed = 123)
# Correlation of model with Stigler (0.95)
cor(latent.srp1$mkl$beta[48:94] - latent.srp1$mkl$beta[1:47], fit.table2$quasi)
# Network model 2: Two-dimensional latent-space model -------
# (this model takes a while to run > 1hr)
latent.srp2 = ergmm(Cnet~euclidean(d = 2) + sender(base = 0) + receiver(base = 0) - 1, response = "citations",
family = "Poisson.log", seed = 123,
control = ergmm.control(interval = 200, sample.size = 10000, burnin = 100000))
# Correlation of output with Stigler (0.99)
cor(latent.srp2$mkl$beta[48:94] - latent.srp2$mkl$beta[1:47], fit.table2$quasi)
# Network Plot 1 (Fig. 12a) -------
# slightly different/rotated than comment version. Changed some control settings.
vc2 = (latent.srp2$mkl$beta[48:94] - latent.srp2$mkl$beta[1:47])/2+1
plot(latent.srp2, labels = T, cex=.7, label.cex=.5,
plot.vars=F, vertex.col = cutree(journals.cluster, h = 0.6)+3,
edge.col=0, print.formula=F, main = NA, vertex.border = 0,
vertex.cex = vc2, label.pos=3, suppress.axes=T, xlab=NA, ylab=NA)
#legend("topleft", col = 4:11, pch = 16, legend=c("review", "general", "theory/method", "appl./health", "computation","eco./envir.", "JSS", "StataJ"), cex=.8, box.col=0)
# Network Plot 2 (Fig. 12b) -------
# cloud/uncertainty plot
p = latent.srp2[["sample"]][["Z"]]
m = matrix(0,0,3)
for (i in 1:47) {
p1 = cbind(p[,i,][,1],p[,i,][,2], rep((cutree(journals.cluster, h = 0.6)+3)[i],10000))
m = rbind(m,p1)
}
#null plot:
plot.ergmm(latent.srp2, xlab = NA, ylab = NA, vertex.col=0, edge.col=0,
plot.vars=F, suppress.axes=T, print.formula=F, vertex.border=0,
xlim = c(-4,4), ylim = c(-3,5), main = NA, pie=F)
#add points
s = sample(1:470000)
ndraws = 47000
for (i in 1:ndraws) {
points(m[s[i],1], m[s[i],2], col = m[s[i],3], pch=".")
}
legend("topleft", col = 4:11, pch = 16, legend=c("review", "general", "theory/method", "appl./health", "computation","eco./envir.", "JSS", "StataJ"), cex=.8, box.col=0)
# Network Plot 1 Optimized for Grayscale (Fig. 12a) -------
vc2 = (latent.srp2$mkl$beta[48:94] - latent.srp2$mkl$beta[1:47])/2+1
group.bw = cutree(journals.cluster, h = 0.6)
group.bw[group.bw==1]=22
group.bw[group.bw==2]=1
group.bw[group.bw==3]=21
group.bw[group.bw==6]=12
group.bw[group.bw==4]=23
group.bw[group.bw==8]=10
plot(latent.srp2$mkl$Z[, 1],latent.srp2$mkl$Z[, 2],
pch = group.bw,
bg="grey",
#col = grey(cutree(journals.cluster, h = 0.6)/9),
#col = cutree(journals.cluster, h = 0.6)+3
lwd = .7, cex = vc2 +.5,
xaxt = 'n', yaxt = 'n', xlab = NA, ylab = NA, bty = 'n'
)
legend("topright", pch = c(22,1,21,23,5,12,7,10), pt.bg ="grey",
legend=c("review", "general", "theory/method", "appl./health", "computation","eco./envir.", "JSS", "StataJ"),
cex=.7, box.col=0)
adjust.x = rep(0,47);
adjust.x[c(8, 23)]=.1;
adjust.x[c(36)]=.2;
adjust.x[c(12, 30)] = -.12;
adjust.x[c(17)] = -.25;
adjust.y = rep(0,47);
adjust.y[c(32)] = .1;
adjust.y[c(31)] = -.3;
adjust.y[c(13, 30)]=-.045;
text(latent.srp2$mkl$Z[, 1]+adjust.x, latent.srp2$mkl$Z[, 2]+adjust.y,
Cnet%v%"vertex.names", cex=.6, pos = 3, offset = .5)
# Network Plot 2 Optimized for Grayscale (Fig. 12b) ####
n = 47
N = 1000
p = latent.srp2[["sample"]][["Z"]]
m = matrix(0,0,3)
for (i in 1:n) {
p1 = cbind(p[,i,][,1],p[,i,][,2], rep((cutree(journals.cluster, h = 0.6)+3)[i], N))
m = rbind(m,p1)
}
#null plot:
plot.ergmm(latent.srp2, xlab = NA, ylab = NA, vertex.col=0, edge.col=0,
plot.vars=F, suppress.axes=T, print.formula=F, vertex.border=0,
xlim = c(-4,4), ylim = c(-3,5), main = NA, pie=F)
#add points
s = sample(1:(n * N * 10))
for (i in 1:(n * N)) {
points(m[s[i],1], m[s[i],2], col = gray((m[s[i],3]-3)/9), pch=".", cex = 3)
}
legend("topleft", col = gray(1:8/9), pch = 16, legend=c("review", "general", "theory/method", "appl./health", "computational","eco./envir.", "JSS", "StataJ"), cex=.8, box.col=0)