citationexchange

Code to accompany discussion of "Statistical Modelling of Citation Exchange Between Statistics Journals" by Cristiano Varin, Manuela Cattelan and David Firth

https://github.com/jcarlen/citationexchange

Science Score: 18.0%

This score indicates how likely this project is to be science-related based on various indicators:

✓
CITATION.cff file
Found CITATION.cff file
○
codemeta.json file
○
.zenodo.json file
○
DOI references
○
Academic publication links
○
Academic email domains
○
Institutional organization owner
○
JOSS paper metadata
○
Scientific vocabulary similarity
Low similarity (1.0%) to scientific vocabulary

Last synced: 10 months ago · JSON representation ·

Repository

Code to accompany discussion of "Statistical Modelling of Citation Exchange Between Statistics Journals" by Cristiano Varin, Manuela Cattelan and David Firth

Basic Info

Host: GitHub
Owner: jcarlen
Language: TeX
Default Branch: master
Size: 1.6 MB

Statistics

Stars: 0
Watchers: 2
Forks: 0
Open Issues: 0
Releases: 0

Created almost 11 years ago · Last pushed over 10 years ago

Metadata Files

Readme Citation

README.md

CitationExchange

Code to accompany discussion of "Statistical Modelling of Citation Exchange Between Statistics Journals" by Cristiano Varin, Manuela Cattelan and David Firth

Owner

Name: jcarlen
Login: jcarlen
Kind: user

Website: https://jcarlen.github.io/
Repositories: 18
Profile: https://github.com/jcarlen

Senior Data Scientist, Los Angeles Times

Citation (CitationExchange_Code.R)

# Code from Carlen and Handcock's "Discussion on 'Statistical Modelling of Citation Exchange Between Statistics
# Journals’ by Cristiano Varin, Manuela Cattelan and David Firth"

------------------------------------------------------------------------------
  
# Pre-Processing -------

library(latentnet)
library(ergm.count)
library(stats)

download.file("http://cristianovarin.weebly.com/uploads/1/5/1/5/15156956/varin_cattelan_firth_supplement.zip", "varin_cattelan_firth_supplement.zip")
unzip("varin_cattelan_firth_supplement.zip")
Cmatrix <- as.matrix(read.csv("Varin_Cattelan_Firth_supplement/Data/cross-citation-matrix.csv", row.names = 1))

#as valued net (#see Modeling valued networks with statnet paper)
#removes self-citations; Stigler method also excluded self-citations
Cnet = as.network(t(Cmatrix), directed=T, matrix.type="a", ignore.eval=F,  names.eval="citations")

# Pre-Processing 2: Code from from Varin_Cattelan_Firth_supplement.R published with article -------

## From Section 1 & 2

journal.abbr <- rownames(Cmatrix)
Tmatrix <- Cmatrix + t(Cmatrix)
diag(Tmatrix) <- diag(Cmatrix)
journals.cluster <- hclust(d = as.dist(1 - cor(Tmatrix)))

## From Section 3: Quasi-Stigler Model
library(BradleyTerry2)

Cdata <- countsToBinomial(Cmatrix)
fit <- BTm(outcome = cbind(win1, win2), player1 = player1, player2 = player2, data = Cdata)

npairs <- NROW(Cdata)
njournals <- nlevels(Cdata$player1)
phi <- sum(residuals(fit, "pearson")^2) / (npairs - (njournals - 1))

### "3.1 Journal residuals"
journal.res <- rep(NA, njournals)
res <- residuals(fit, type = "pearson")
coefs <- c(0, coef(fit)) # 0 is the coefficient of the first journal
for(i in 1:njournals){
  A <- which(Cdata$player1 == journal.abbr[i])
  B <- which(Cdata$player2 == journal.abbr[i])
  y <- c(res[A], -res[B])
  x <- c(-coefs[Cdata$player2[A]], -coefs[Cdata$player1[B]])
  journal.res[i] <- sum(y * x) / sqrt(phi * sum(x ^ 2))
}
names(journal.res) <- journal.abbr

library(qvcalc)
cov.matrix <- matrix(0, nrow = njournals, ncol = njournals)
cov.matrix[-1, -1] <- vcov(fit)
qse <- qvcalc(phi * cov.matrix , estimates = c(0, coef(fit)),
              labels = journal.abbr)

export.scores <- qse$qvframe$estimate
export.scores <- export.scores - mean(export.scores)
names(export.scores) <- journal.abbr

sort.id <- sort(export.scores, decreasing = TRUE, index.return = TRUE)$ix
fit.table <- data.frame(quasi = export.scores[sort.id], qse = qse$qvframe$quasiSE[sort.id])

## Some post-processing/alphabetizing (not from author supplement)

rownames(fit.table)
match(rownames(fit.table),Cnet%v%"vertex.names")
fit.table2 = fit.table[order(match(rownames(fit.table),Cnet%v%"vertex.names")),]

# Network model 1: Sender-receiver model with citation counts ~ Poisson -------

latent.srp1 = ergmm(Cnet~ sender(base = 0) + receiver(base = 0) - 1, response = "citations",
                    family = "Poisson.log", control = control.ergmm(burnin = 40000), seed = 123)

# Correlation of model with Stigler (0.95)

cor(latent.srp1$mkl$beta[48:94] - latent.srp1$mkl$beta[1:47], fit.table2$quasi)

# Network model 2: Two-dimensional latent-space model  -------
# (this model takes a while to run > 1hr)

latent.srp2 = ergmm(Cnet~euclidean(d = 2) + sender(base = 0) + receiver(base = 0) - 1, response = "citations", 
                    family = "Poisson.log", seed = 123, 
                    control = ergmm.control(interval = 200, sample.size = 10000, burnin = 100000)) 

# Correlation of output with Stigler (0.99)

cor(latent.srp2$mkl$beta[48:94] - latent.srp2$mkl$beta[1:47], fit.table2$quasi)

# Network Plot 1 (Fig. 12a) -------
# slightly different/rotated than comment version. Changed some control settings. 

vc2 = (latent.srp2$mkl$beta[48:94] - latent.srp2$mkl$beta[1:47])/2+1

plot(latent.srp2, labels = T, cex=.7, label.cex=.5,
     plot.vars=F, vertex.col = cutree(journals.cluster, h = 0.6)+3, 
     edge.col=0, print.formula=F, main = NA, vertex.border = 0,
     vertex.cex = vc2, label.pos=3, suppress.axes=T,  xlab=NA, ylab=NA)

#legend("topleft", col = 4:11, pch = 16, legend=c("review", "general", "theory/method", "appl./health", "computation","eco./envir.", "JSS", "StataJ"), cex=.8, box.col=0)

# Network Plot 2 (Fig. 12b) -------

# cloud/uncertainty plot

p = latent.srp2[["sample"]][["Z"]]
m = matrix(0,0,3)
for (i in 1:47) {
  p1 = cbind(p[,i,][,1],p[,i,][,2], rep((cutree(journals.cluster, h = 0.6)+3)[i],10000))
  m = rbind(m,p1)
}

#null plot:
plot.ergmm(latent.srp2, xlab = NA, ylab = NA, vertex.col=0, edge.col=0, 
           plot.vars=F, suppress.axes=T, print.formula=F, vertex.border=0,
           xlim = c(-4,4), ylim = c(-3,5), main = NA, pie=F)

#add points
s = sample(1:470000)
ndraws = 47000
for (i in 1:ndraws) {
  points(m[s[i],1], m[s[i],2], col = m[s[i],3], pch=".")
}

legend("topleft", col = 4:11, pch = 16, legend=c("review", "general", "theory/method", "appl./health", "computation","eco./envir.", "JSS", "StataJ"), cex=.8, box.col=0)

# Network Plot 1 Optimized for Grayscale (Fig. 12a) -------

vc2 = (latent.srp2$mkl$beta[48:94] - latent.srp2$mkl$beta[1:47])/2+1

group.bw = cutree(journals.cluster, h = 0.6)
group.bw[group.bw==1]=22
group.bw[group.bw==2]=1
group.bw[group.bw==3]=21
group.bw[group.bw==6]=12
group.bw[group.bw==4]=23
group.bw[group.bw==8]=10

plot(latent.srp2$mkl$Z[, 1],latent.srp2$mkl$Z[, 2], 
     pch = group.bw,
     bg="grey",
     #col = grey(cutree(journals.cluster, h = 0.6)/9),  
     #col = cutree(journals.cluster, h = 0.6)+3
     lwd = .7, cex = vc2 +.5,
     xaxt = 'n', yaxt = 'n', xlab = NA, ylab = NA, bty = 'n'
)

legend("topright", pch = c(22,1,21,23,5,12,7,10), pt.bg ="grey",
       legend=c("review", "general", "theory/method", "appl./health", "computation","eco./envir.", "JSS", "StataJ"),
       cex=.7, box.col=0)

adjust.x = rep(0,47); 
adjust.x[c(8, 23)]=.1;
adjust.x[c(36)]=.2; 
adjust.x[c(12, 30)] = -.12;
adjust.x[c(17)] = -.25; 

adjust.y = rep(0,47); 
adjust.y[c(32)] = .1;
adjust.y[c(31)] = -.3;
adjust.y[c(13, 30)]=-.045; 

text(latent.srp2$mkl$Z[, 1]+adjust.x, latent.srp2$mkl$Z[, 2]+adjust.y, 
     Cnet%v%"vertex.names", cex=.6, pos = 3, offset = .5)

# Network Plot 2 Optimized for Grayscale (Fig. 12b) ####

n = 47
N = 1000 
p = latent.srp2[["sample"]][["Z"]]
m = matrix(0,0,3)
for (i in 1:n) {
  p1 = cbind(p[,i,][,1],p[,i,][,2], rep((cutree(journals.cluster, h = 0.6)+3)[i], N))
  m = rbind(m,p1)
}

#null plot:
plot.ergmm(latent.srp2, xlab = NA, ylab = NA, vertex.col=0, edge.col=0, 
           plot.vars=F, suppress.axes=T, print.formula=F, vertex.border=0,
           xlim = c(-4,4), ylim = c(-3,5), main = NA, pie=F)

#add points
s = sample(1:(n * N * 10))
for (i in 1:(n * N)) {
  points(m[s[i],1], m[s[i],2], col = gray((m[s[i],3]-3)/9), pch=".", cex = 3)
}

legend("topleft", col = gray(1:8/9), pch = 16, legend=c("review", "general", "theory/method", "appl./health", "computational","eco./envir.", "JSS", "StataJ"), cex=.8, box.col=0)

ecosyste.ms

Data

Tools

Indexes

Applications

Experiments

Open Source Science

citationexchange

Science Score: 18.0%

Repository

Basic Info

Statistics

Metadata Files

README.md

CitationExchange

Owner

Citation (CitationExchange_Code.R)

GitHub Events

Total

Last Year