peer_review_bias

https://github.com/sdey6/peer_review_bias
Science Score: 18.0%

This score indicates how likely this project is to be science-related based on various indicators:
✓
CITATION.cff file
Found CITATION.cff file
○
codemeta.json file
○
.zenodo.json file
○
DOI references
○
Academic publication links
○
Academic email domains
○
Institutional organization owner
○
JOSS paper metadata
○
Scientific vocabulary similarity
Low similarity (5.7%) to scientific vocabulary
Last synced: 10 months ago · JSON representation ·
Repository

Basic Info

Host: GitHub
Owner: sdey6
Language: Python
Default Branch: master
Size: 77.1 MB
Statistics

Stars: 0
Watchers: 1
Forks: 0
Open Issues: 0
Releases: 0
Created over 2 years ago · Last pushed over 2 years ago
Metadata Files

Readme Citation
README.md

Age Bias Detection in Peer Review

Brief description of your project.
Getting OpenReview data

Installation

bash pip install openreview-py
For using serpAPI the following command needs to run
bash pip install google-search-results
Usage

This thesis code repository is for reproducing the data collection in methodology section, also data preprocessing and database design.
Produced year wise review and author data is pickled along with the citation data in the "data_pickled" folder.
The experiment results are in textanalysis.py and numericalanalysis.py files.
Owner

Login: sdey6
Kind: user
Repositories: 1
Profile: https://github.com/sdey6
Citation (citation_analysis.py)

# -*- coding: utf-8 -*-
"""Citation Analysis.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1QZ_ZD5lWcsKklVqxpRvvpe-kT0RN1e0o

This notebook has visualization regarding citation related experiments.
"""

import pandas as pd
import pickle
import ast
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np

from google.colab import drive
drive.mount('/content/drive')

data_path = '/content/drive/MyDrive/thesis-repository/data_17_19/'

# get citation data and merge with main dataset

df_17_all = pickle.load(open(f'{data_path}all_2017.pkl', 'rb'))
df_17_all['decision'] = df_17_all['decision'] .apply(lambda x:'Accept' if x.startswith('Accept') else 'Reject')
df_17_all.title = df_17_all.title.apply(lambda x: x.lower())
df_cit_17 = pickle.load(open(f'{data_path}all_cit_17.pkl', 'rb'))
cit_17 = df_17_all.merge(df_cit_17,on='title')


df_18_all = pickle.load(open(f'{data_path}data_18_all.pkl', 'rb'))
df_18_all['decision'] = df_18_all['decision'] .apply(lambda x:'Accept' if x.startswith('Accept') else 'Reject')
df_18_all = df_18_all.rename(columns = {'Title':'title'})
df_18_all.title = df_18_all.title.apply(lambda x: x.lower())
df_cit_18 = pickle.load(open(f'{data_path}all_cit_18.pkl', 'rb'))
cit_18 = df_18_all.merge(df_cit_18,on='title')


df_19_all = pickle.load(open(f'{data_path}data_19_all.pkl', 'rb'))
df_19_all = df_19_all.rename(columns={'recommendation':'decision'})
df_19_all['decision'] = df_19_all['decision'] .apply(lambda x:'Accept' if x.startswith('Accept') else 'Reject')
df_19_all = df_19_all.rename(columns = {'Title':'title'})
df_19_all.title = df_19_all.title.apply(lambda x: x.lower())
df_cit_19 = pickle.load(open(f'{data_path}all_cit_19.pkl', 'rb'))
cit_19 = df_19_all.merge(df_cit_19,on='title')

print(cit_17.shape,cit_18.shape,cit_19.shape)

"""**Citation count Vs Rating**"""

cit_17_rev = cit_17[['title', 'decision', 'cited_by_count', 'rating_score']]
cit_17_e = cit_17_rev.explode('rating_score').reset_index(drop=True)
cit_17_e = cit_17_e.groupby('rating_score')[['title', 'cited_by_count']].agg(list).reset_index()
cit_17_e = cit_17_e.explode('cited_by_count')

cit_18_rev = cit_18[['title', 'decision', 'cited_by_count', 'rating_score']]
cit_18_e = cit_18_rev.explode('rating_score').reset_index(drop=True)
cit_18_e = cit_18_e.groupby('rating_score')[['title', 'cited_by_count']].agg(list).reset_index()
cit_18_e = cit_18_e.explode('cited_by_count')

cit_19_rev = cit_19[['title', 'decision', 'cited_by_count', 'rating_score']]
cit_19_e = cit_19_rev.explode('rating_score').reset_index(drop=True)
cit_19_e = cit_19_e.groupby('rating_score')[['title', 'cited_by_count']].agg(list).reset_index()
cit_19_e = cit_19_e.explode('cited_by_count')

fig, (ax1, ax2,ax3) = plt.subplots(1, 3, figsize=(13, 4),)
sns.barplot(data=cit_17_e,x='rating_score',y='cited_by_count',palette='Purples',errorbar=None,ax=ax1,)
sns.barplot(data=cit_18_e,x='rating_score',y='cited_by_count',palette='Purples',errorbar=None,ax=ax2,)
sns.barplot(data=cit_19_e,x='rating_score',y='cited_by_count',palette='Purples',errorbar=None,ax=ax3,)
ax1.set_ylabel('Average Citation',fontdict={ 'fontsize': 12})
ax2.set_ylabel('')
ax3.set_ylabel('')
fig.suptitle('Citation count Vs Rating',y=1.1)
ax1.set_title('SB-2017')
ax2.set_title('DB-2018')
ax3.set_title('DB-2019')
ax1.set_xlabel('')
ax2.set_xlabel('Rating',fontdict={ 'fontsize': 12})
ax3.set_xlabel('')
ax1.set_ylim([0, 820])
ax2.set_ylim([0, 800])
ax3.set_ylim([0, 800])
plt.savefig("citation vs Rating.png",bbox_inches="tight")
plt.show()

"""**Citation Count VS Paper Decision**"""

cit_17_rev['rating_score'] = cit_17_rev['rating_score'].apply(lambda x: [int(i) for i in x])
cit_17_rev['avg_rating'] = cit_17_rev['rating_score'].apply(lambda x: np.average(x, weights=x))
cit_17_rev = cit_17_rev[['title', 'decision', 'avg_rating','cited_by_count']]
cit_17_sorted = cit_17_rev.sort_values(by='avg_rating', ascending=True).reset_index(drop=True).reset_index()
cit_17_sorted = cit_17_sorted.rename(columns={'index': 'ID'})
cit_17_sorted['ID'] = cit_17_sorted['ID'].apply(lambda x: x + 1)



cit_18_rev['rating_score'] = cit_18_rev['rating_score'].apply(lambda x: [int(i) for i in x])
cit_18_rev['avg_rating'] = cit_18_rev['rating_score'].apply(lambda x: np.average(x, weights=x))
cit_18_rev = cit_18_rev[['title', 'decision', 'avg_rating','cited_by_count']]
cit_18_sorted = cit_18_rev.sort_values(by='avg_rating', ascending=True).reset_index(drop=True).reset_index()
cit_18_sorted = cit_18_sorted.rename(columns={'index': 'ID'})
cit_18_sorted['ID'] = cit_18_sorted['ID'].apply(lambda x: x + 1)


cit_19_rev['rating_score'] = cit_19_rev['rating_score'].apply(lambda x: [int(i) for i in x])
cit_19_rev['avg_rating'] = cit_19_rev['rating_score'].apply(lambda x: np.average(x, weights=x))
cit_19_rev = cit_19_rev[['title', 'decision', 'avg_rating','cited_by_count']]
cit_19_sorted = cit_19_rev.sort_values(by='avg_rating', ascending=True).reset_index(drop=True).reset_index()
cit_19_sorted = cit_19_sorted.rename(columns={'index': 'ID'})
cit_19_sorted['ID'] = cit_19_sorted['ID'].apply(lambda x: x + 1)
cit_19_sorted.head()

cit_18_sorted[(cit_18_sorted.cited_by_count==0 )&( cit_18_sorted.decision=='Accept' )]

fig, (ax1, ax2,ax3) = plt.subplots(1, 3, figsize=(14, 4))
order=['Accept','Reject']
palette = [ "#ffcc00","#993300",]
sns.scatterplot(data=cit_17_sorted, x="ID", y="cited_by_count",hue='decision',ax=ax1,alpha=.7,palette=palette,hue_order=order).legend_.remove()
sns.scatterplot(data=cit_18_sorted, x="ID", y="cited_by_count",hue='decision',ax=ax2,alpha=.7,palette=palette,hue_order=order)
sns.scatterplot(data=cit_19_sorted, x="ID", y="cited_by_count",hue='decision',ax=ax3,alpha=.7,palette=palette,hue_order=order).legend_.remove()
ax2.set_ylabel('')
ax3.set_ylabel('')
ax1.set_ylabel('Citation count',fontdict={ 'fontsize': 12})
fig.suptitle('Citation Count VS Paper Decision',y=1.1)
ax1.set_title('SB-2017')
ax2.set_title('DB-2018')
ax3.set_title('DB-2019')
ax1.set_xlabel('')

ax3.set_xlabel('')
ax1.set_ylim([0, 10000])
ax2.set_ylim([0, 10000])
ax3.set_ylim([0, 10000])
ax2.legend(loc="upper center", bbox_to_anchor=(0.5, -.08), ncol=2,fontsize=12)
ax2.set_xlabel('\n\n\npapers',fontdict={ 'fontsize': 12})
plt.savefig("citation_vs_decision.png",bbox_inches="tight")
plt.show()

cit_17_auth = cit_17[['title', 'decision', 'cited_by_count', 'academic_age']]
cit_18_auth = cit_18[['title', 'decision', 'cited_by_count', 'academic_age']]
cit_19_auth = cit_19[['title', 'decision', 'cited_by_count', 'academic_age']]

cit_17_auth = cit_17_auth.explode('academic_age')
cit_17_auth = cit_17_auth[~(cit_17_auth['academic_age'].isin(['skipped','no_match','no match']))].reset_index(drop=True)
cit_18_auth = cit_18_auth.explode('academic_age')
cit_18_auth = cit_18_auth[~(cit_18_auth['academic_age'].isin(['skipped','no_match','no match','only informal']))].reset_index(drop=True)
cit_19_auth = cit_19_auth.explode('academic_age')
cit_19_auth = cit_19_auth[~(cit_19_auth['academic_age'].isin(['skipped','no_match','no match']))].reset_index(drop=True)

def get_age_labels(age):
  if age<=3:
    return 'Junior'
  elif age>3 and age<=10: # keep
    return 'Intermediate'
  elif age>10:
    return 'Senior'

get_age_labels(15)

cit_17_auth['author_category'] = cit_17_auth.apply(lambda x: get_age_labels(x['academic_age']),axis=1)
cit_18_auth['author_category'] = cit_18_auth.apply(lambda x: get_age_labels(x['academic_age']),axis=1)
cit_19_auth['author_category'] = cit_19_auth.apply(lambda x: get_age_labels(x['academic_age']),axis=1)

print(cit_17_auth[cit_17_auth.cited_by_count>=100].shape)
print(cit_18_auth[cit_18_auth.cited_by_count>=100].shape)
cit_19_auth[cit_19_auth.cited_by_count>=100].shape

print(cit_17_auth[cit_17_auth.cited_by_count>=100].groupby(['title','decision'])['academic_age',	'author_category'].agg(list).reset_index().shape)
print(cit_18_auth[cit_18_auth.cited_by_count>=100].groupby(['title','decision'])['academic_age',	'author_category'].agg(list).reset_index().shape)
print(cit_19_auth[cit_19_auth.cited_by_count>=100].groupby(['title','decision'])['academic_age',	'author_category'].agg(list).reset_index().shape)

cit_17_auth.head()

ci_17_100 = (cit_17_auth[cit_17_auth.cited_by_count>=100].groupby(['author_category']).size()
            /len(cit_17_auth[cit_17_auth.cited_by_count>=100])*100).reset_index().rename({0:'percentage_count'}, axis=1 )

ci_18_100 = (cit_18_auth[cit_18_auth.cited_by_count>=100].groupby(['author_category']).size()
            /len(cit_18_auth[cit_18_auth.cited_by_count>=100])*100).reset_index().rename({0:'percentage_count'}, axis=1 )

ci_19_100 = (cit_19_auth[cit_19_auth.cited_by_count>=100].groupby(['author_category']).size()
            /len(cit_19_auth[cit_19_auth.cited_by_count>=100])*100).reset_index().rename({0:'percentage_count'}, axis=1 )

"""**Average citation count with Juniors as First Author**"""

# get the combinations of junior and rest of the categories

def get_comb(cat_list):
  if cat_list[0] == 'Junior':
    if len(cat_list) == 1 : return 'NA' # only a single junior author
    elif len(cat_list) > 1:
      if len(set(cat_list)) <= 1 == True : return 'junior and junior' # juniors combining with juniors
      if 'Senior' in cat_list and 'Intermediate' not in cat_list : return 'junior and senior' # at least one senior with juniors, no intermed
      if 'Intermediate' in cat_list and 'Senior' not in cat_list : return 'junior and intermediate' # at least one intermed with juniors, no senior
      if all(x in cat_list for x in ['Senior', 'Intermediate']) == True : return 'junior,intermediate,senior' # juniors with at least one senior and intermed
  # elif cat_list[0] == 'Senior' and  'Intermediate' in cat_list and 'Junior' not in cat_list: return 'senior and Intermediate' # Seniors and interm
  # elif cat_list[0] == 'Intermediate' and  'Senior' in cat_list and 'Junior' not in cat_list: return 'Intermediate and senior' # Interm and seniors
  else: return 'NA'

cit_17_auth_a = cit_17_auth.groupby(['title','decision','cited_by_count'])[['academic_age','author_category']].agg(list).reset_index()
cit_18_auth_a = cit_18_auth.groupby(['title','decision','cited_by_count'])[['academic_age','author_category']].agg(list).reset_index()
cit_19_auth_a = cit_19_auth.groupby(['title','decision','cited_by_count'])[['academic_age','author_category']].agg(list).reset_index()

cit_17_auth_a = cit_17_auth_a[['title','decision','academic_age','cited_by_count','author_category']]
cit_18_auth_a = cit_18_auth_a[['title','decision','academic_age','cited_by_count','author_category']]
cit_19_auth_a = cit_19_auth_a[['title','decision','academic_age','cited_by_count','author_category']]

# cit_17_auth_a['is_FA_junior'] = cit_17_auth_a.author_category.apply(lambda x : check_fa_junior(x))
# cit_18_auth_a['is_FA_junior'] = cit_18_auth_a.author_category.apply(lambda x : check_fa_junior(x))
# cit_19_auth_a['is_FA_junior'] = cit_19_auth_a.author_category.apply(lambda x : check_fa_junior(x))


# cit_17_auth_a = cit_17_auth_a[cit_17_auth_a.is_FA_junior == 'FA-Junior'].reset_index(drop=True)
# cit_18_auth_a = cit_18_auth_a[cit_18_auth_a.is_FA_junior == 'FA-Junior'].reset_index(drop=True)
# cit_19_auth_a = cit_19_auth_a[cit_19_auth_a.is_FA_junior == 'FA-Junior'].reset_index(drop=True)

# order = ['junior,intermediate,senior','junior and senior','Intermediate and senior','junior and intermediate', 'senior and Intermediate',
#         'junior and junior',]

cit_17_auth_a['Combination'] = cit_17_auth_a.author_category.apply(lambda x : get_comb(x))
cit_17_auth_a = cit_17_auth_a[~(cit_17_auth_a.Combination.isnull())]
cit_17_auth_a = cit_17_auth_a[cit_17_auth_a.Combination !='NA'].reset_index(drop=True)
# cit_17_auth_a = cit_17_auth_a[cit_17_auth_a.Combination.isin(order)].reset_index(drop=True)

cit_18_auth_a['Combination'] = cit_18_auth_a.author_category.apply(lambda x : get_comb(x))
cit_18_auth_a = cit_18_auth_a[cit_18_auth_a.Combination !='NA'].reset_index(drop=True)
# cit_18_auth_a = cit_18_auth_a[cit_18_auth_a.Combination.isin(order)].reset_index(drop=True)

cit_19_auth_a['Combination'] = cit_19_auth_a.author_category.apply(lambda x : get_comb(x))
cit_19_auth_a = cit_19_auth_a[cit_19_auth_a.Combination !='NA'].reset_index(drop=True)
# cit_19_auth_a = cit_19_auth_a[cit_19_auth_a.Combination.isin(order)]

comb_17 = cit_17_auth_a.groupby('Combination')['cited_by_count'].mean().reset_index()
# comb_17 = comb_17.explode('cited_by_count').reset_index(drop=True)
# comb_17.cited_by_count = comb_17.cited_by_count.astype(int)

comb_18 = cit_18_auth_a.groupby('Combination')['cited_by_count'].mean().reset_index()
# comb_18 = comb_18.explode('cited_by_count').reset_index(drop=True)
# comb_18.cited_by_count = comb_18.cited_by_count.astype(int)

comb_19 = cit_19_auth_a.groupby('Combination')['cited_by_count'].mean().reset_index()
# comb_19 = comb_19.explode('cited_by_count').reset_index(drop=True)
# comb_19.cited_by_count = comb_19.cited_by_count.astype(int)

ci_17_100_comb = (cit_17_auth_a[cit_17_auth_a.cited_by_count>=50].groupby(['Combination']).size()
            /len(cit_17_auth_a[cit_17_auth_a.cited_by_count>=50])*100).reset_index().rename({0:'percentage_count'}, axis=1 )

ci_18_100_comb = (cit_18_auth_a[cit_18_auth_a.cited_by_count>=50].groupby(['Combination']).size()
            /len(cit_18_auth_a[cit_18_auth_a.cited_by_count>=50])*100).reset_index().rename({0:'percentage_count'}, axis=1 )

ci_19_100_comb = (cit_19_auth_a[cit_19_auth_a.cited_by_count>=50].groupby(['Combination']).size()
            /len(cit_19_auth_a[cit_19_auth_a.cited_by_count>=50])*100).reset_index().rename({0:'percentage_count'}, axis=1 )

def check_fa_all(auth_cat_list):
  for cat in auth_cat_list:
    if cat == 'Junior': return 'FA-Junior'
    elif cat == 'Intermediate': return 'FA-Intermediate'
    else: return 'FA-Senior'

cit_17_auth_b = cit_17_auth.groupby(['title','decision','cited_by_count'])[['academic_age','author_category']].agg(list).reset_index()
cit_17_auth_b['fa_category'] = cit_17_auth_b.author_category.apply(lambda x: check_fa_all(x))
cit_17_auth_b = cit_17_auth_b[['title','decision','cited_by_count','fa_category']]


cit_18_auth_b = cit_18_auth.groupby(['title','decision','cited_by_count'])[['academic_age','author_category']].agg(list).reset_index()
cit_18_auth_b['fa_category'] = cit_18_auth_b.author_category.apply(lambda x: check_fa_all(x))
cit_18_auth_b = cit_18_auth_b[['title','decision','cited_by_count','fa_category']]

cit_19_auth_b = cit_19_auth.groupby(['title','decision','cited_by_count'])[['academic_age','author_category']].agg(list).reset_index()
cit_19_auth_b['fa_category'] = cit_19_auth_b.author_category.apply(lambda x: check_fa_all(x))
cit_19_auth_b = cit_19_auth_b[['title','decision','cited_by_count','fa_category']]

import matplotlib.patheffects as path_effects

def add_median_labels(ax, fmt='.1f'):
    lines = ax.get_lines()
    boxes = [c for c in ax.get_children() if type(c).__name__ == 'PathPatch']
    lines_per_box = int(len(lines) / len(boxes))
    for median in lines[4:len(lines):lines_per_box]:
        x, y = (data.mean() for data in median.get_data())
        # choose value depending on horizontal or vertical plot orientation
        value = x if (median.get_xdata()[1] - median.get_xdata()[0]) == 0 else y
        text = ax.text(x, y, f'{value:{fmt}}', ha='center', va='center',
                       fontweight='bold', color='white')
        # create median-colored border around white text for contrast
        text.set_path_effects([
            path_effects.Stroke(linewidth=3, foreground=median.get_color()),
            path_effects.Normal(),
        ])

"""**Average citation count for one author**"""

cit_17_auth_one = cit_17_auth.groupby(['title','decision','cited_by_count',])[['academic_age','author_category']].agg(list).reset_index()
cit_18_auth_one = cit_18_auth.groupby(['title','decision','cited_by_count',])[['academic_age','author_category']].agg(list).reset_index()
cit_19_auth_one = cit_19_auth.groupby(['title','decision','cited_by_count',])[['academic_age','author_category']].agg(list).reset_index()

cit_17_auth_one['auth_len'] = cit_17_auth_one.academic_age.apply(lambda x: len(x))
cit_17_auth_one = cit_17_auth_one[cit_17_auth_one.auth_len == 1].reset_index(drop=True)
cit_17_auth_one.author_category = cit_17_auth_one.author_category.apply(lambda x: x[0])
cit_17_auth_one = cit_17_auth_one[['title','cited_by_count','author_category']]

cit_18_auth_one['auth_len'] = cit_18_auth_one.academic_age.apply(lambda x: len(x))
cit_18_auth_one = cit_18_auth_one[cit_18_auth_one.auth_len == 1].reset_index(drop=True)
cit_18_auth_one.author_category = cit_18_auth_one.author_category.apply(lambda x: x[0])
cit_18_auth_one = cit_18_auth_one[['title','cited_by_count','author_category']]

cit_19_auth_one['auth_len'] = cit_19_auth_one.academic_age.apply(lambda x: len(x))
cit_19_auth_one = cit_19_auth_one[cit_19_auth_one.auth_len == 1].reset_index(drop=True)
cit_19_auth_one.author_category = cit_19_auth_one.author_category.apply(lambda x: x[0])
cit_19_auth_one = cit_19_auth_one[['title','cited_by_count','author_category']]

hue_order = [ 'Senior','Intermediate','Junior']
order_fa = ['FA-Senior', 'FA-Intermediate', 'FA-Junior']
fig, ax = plt.subplots(2, 3, figsize=(12, 7),sharey=True)
order = ['junior,intermediate,senior','junior and senior','junior and intermediate',
        'junior and junior',]

sns.barplot( data = cit_17_auth_one,x='author_category',y='cited_by_count',order=hue_order,ax=ax[1,0],errorbar=None)
sns.barplot( data = cit_18_auth_one,x='author_category',y='cited_by_count',order=hue_order,ax=ax[1,1],errorbar=None)
sns.barplot( data = cit_19_auth_one,x='author_category',y='cited_by_count',order=hue_order,ax=ax[1,2],errorbar=None)
sns.barplot( data = cit_17_auth_b,x='fa_category',y='cited_by_count',order=order_fa,ax=ax[0,0],errorbar=None)
sns.barplot( data = cit_18_auth_b,x='fa_category',y='cited_by_count',order=order_fa,ax=ax[0,1],errorbar=None)
sns.barplot( data = cit_19_auth_b,x='fa_category',y='cited_by_count',order=order_fa,ax=ax[0,2],errorbar=None)
# sns.barplot( data = comb_17,x='cited_by_count',y='Combination',order=order,ax=ax[2,0],errorbar=None)
# sns.barplot( data = comb_18,x='cited_by_count',y='Combination',order=order,ax=ax[2,1],errorbar=None).set(yticklabels=[])
# sns.barplot( data = comb_19,x='cited_by_count',y='Combination',order=order,ax=ax[2,2],errorbar=None).set(yticklabels=[])
fig.tight_layout()
ax[0,1].set_ylabel('')
ax[0,2].set_ylabel('')
ax[0,0].set_xlabel('')
ax[0,1].set_xlabel('Multiple Authors',fontdict={ 'fontsize': 12})
ax[0,2].set_xlabel('')
ax[1,0].set_xlabel('')
ax[1,1].set_xlabel('Single Author',fontdict={ 'fontsize': 12})
ax[1,2].set_xlabel('')
ax[1,1].set_ylabel('')
ax[1,2].set_ylabel('')
ax[0,0].set_ylabel('\n Average Citation',fontdict={ 'fontsize': 12})
ax[1,0].set_ylabel('\n Average Citation',fontdict={ 'fontsize': 12})

for container in ax[0,0].containers:
    ax[0,0].bar_label(container, fmt='%.0f',weight='bold')
for container in ax[0,1].containers:
    ax[0,1].bar_label(container, fmt='%.0f',weight='bold')
for container in ax[0,2].containers:
    ax[0,2].bar_label(container, fmt='%.0f',weight='bold')
for container in ax[1,0].containers:
    ax[1,0].bar_label(container, fmt='%.0f',weight='bold')
for container in ax[1,1].containers:
    ax[1,1].bar_label(container, fmt='%.0f',weight='bold')
for container in ax[1,2].containers:
    ax[1,2].bar_label(container, fmt='%.0f',weight='bold')

fig.suptitle('Author actegory Statistics',y=1.05)
ax[0,0].set_title('SB-2017')
ax[0,1].set_title('DB-2018')
ax[0,2].set_title('DB-2019')

plt.savefig("avg_citation_single_multi.png",bbox_inches='tight')
fig.show()

fig, (ax1, ax2,ax3) = plt.subplots(1, 3, figsize=(15, 5.5))
order = ['junior,intermediate,senior','junior and senior','junior and intermediate','junior and junior']
sns.barplot(data = ci_17_100_comb,x='percentage_count',y='Combination',ax=ax1,
           palette='rocket',order=order)#palette='rocket'
sns.barplot(data = ci_18_100_comb,x='percentage_count',y='Combination',ax=ax2,order=order,
            palette='rocket').set(yticklabels=[])
sns.barplot(data = ci_19_100_comb,x='percentage_count',y='Combination',ax=ax3,palette='rocket',order=order).set(yticklabels=[])
for container in ax1.containers:
    ax1.bar_label(container, fmt='%.1f',weight='bold',fontsize=12)
for container in ax2.containers:
    ax2.bar_label(container, fmt='%.1f',weight='bold',fontsize=12)
for container in ax3.containers:
    ax3.bar_label(container, fmt='%.1f',weight='bold',fontsize=12)
ax1.set_xlim([0, 100])
ax2.set_xlim([0, 100])
ax3.set_xlim([0, 100])
ax1.set_yticklabels(ax1.get_yticklabels(), fontsize=12)
ax2.set_ylabel('')
ax3.set_ylabel('')
ax1.set_xlabel('')
ax3.set_xlabel('')
ax2.set_xlabel('Percentage count',fontdict={ 'fontsize': 13})
ax1.set_ylabel('Collaboration',fontdict={ 'fontsize': 13})
fig.suptitle('Participation of Authors in Papers with >100 Citation',y=1,)
ax1.set_title('SB-2017')
ax2.set_title('DB-2018')
ax3.set_title('DB-2019')
# ax1.tick_params(axis='x', rotation=80)
# ax2.tick_params(axis='x', rotation=80)
# ax3.tick_params(axis='x', rotation=80)
fig.tight_layout()
plt.savefig("contribution of authors_citation.png", bbox_inches="tight")
fig.show()

"""**Number of authors contributing to papers with more than 100 citation**"""

ci_17_100_one = (cit_17_auth_one[cit_17_auth_one.cited_by_count>=50].groupby(['author_category']).size()
            /len(cit_17_auth_one[cit_17_auth_one.cited_by_count>=50])*100).reset_index().rename({0:'percentage_count'}, axis=1 )

ci_18_100_one = (cit_18_auth_one[cit_18_auth_one.cited_by_count>=50].groupby(['author_category']).size()
            /len(cit_18_auth_one[cit_18_auth_one.cited_by_count>=50])*100).reset_index().rename({0:'percentage_count'}, axis=1 )

ci_19_100_one = (cit_19_auth_one[cit_19_auth_one.cited_by_count>=50].groupby(['author_category']).size()
            /len(cit_19_auth_one[cit_19_auth_one.cited_by_count>=50])*100).reset_index().rename({0:'percentage_count'}, axis=1 )

# plot papers decision ratio

hue_order = [ 'Senior','Intermediate','Junior']
order_fa = ['FA-Senior', 'FA-Intermediate', 'FA-Junior']
order = ['junior,intermediate,senior','junior and senior','junior and intermediate',
        'junior and junior',]

fig, ax = plt.subplots(3, 3, figsize=(18, 13),)

sns.barplot( data = ci_17_100_one,x='author_category',y='percentage_count',order=hue_order,ax=ax[0,0])
a = sns.barplot( data = ci_18_100_one,x='author_category',y='percentage_count',order=hue_order,ax=ax[0,1])
a.set(yticklabels=[])
sns.barplot( data = ci_19_100_one,x='author_category',y='percentage_count',order=hue_order,ax=ax[0,2]).set(yticklabels=[])
sns.barplot( data = ci_17_100,x='author_category',y='percentage_count',order=hue_order,ax=ax[1,0])
sns.barplot( data = ci_18_100,x='author_category',y='percentage_count',order=hue_order,ax=ax[1,1]).set(yticklabels=[])
sns.barplot( data = ci_19_100,x='author_category',y='percentage_count',order=hue_order,ax=ax[1,2]).set(yticklabels=[])
sns.barplot( data = ci_17_100_comb,x='percentage_count',y='Combination',order=order,ax=ax[2,0])
sns.barplot( data = ci_18_100_comb,x='percentage_count',y='Combination',order=order,ax=ax[2,1]).set(yticklabels=[])
sns.barplot( data = ci_19_100_comb,x='percentage_count',y='Combination',order=order,ax=ax[2,2]).set(yticklabels=[])
fig.tight_layout()
ax[0,1].set_xlabel(' One Author',fontdict={ 'fontsize': 15})
ax[0,2].set_ylabel('')
ax[0,0].set_xlabel('')
ax[0,0].set_ylabel('')
ax[1,0].set_ylabel('')
ax[2,0].set_ylabel('')
ax[0,1].set_ylabel('')
ax[0,2].set_xlabel('',fontsize=12)
ax[1,0].set_xlabel('')
ax[1,1].set_ylabel('')
ax[1,2].set_xlabel('',fontsize=12)
ax[1,1].set_xlabel(' First Author',fontdict={ 'fontsize': 15})
ax[1,2].set_ylabel('')
ax[2,0].set_xlabel('')
ax[2,2].set_xlabel('')
ax[2,1].set_ylabel('')
ax[2,2].set_ylabel('')
ax[2,1].set_xlabel('\nAuthor Combination',fontdict={ 'fontsize': 15})
for container in ax[0,0].containers:
    ax[0,0].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
for container in ax[0,1].containers:
    ax[0,1].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
for container in ax[0,2].containers:
    ax[0,2].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
for container in ax[1,0].containers:
    ax[1,0].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
for container in ax[1,1].containers:
    ax[1,1].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
for container in ax[1,2].containers:
    ax[1,2].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
for container in ax[2,0].containers:
    ax[2,0].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
for container in ax[2,1].containers:
    ax[2,1].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
for container in ax[2,2].containers:
    ax[2,2].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
fig.suptitle('Percentage of Authors for papers with Citation > 100',y=1.05,fontsize=14)
ax[0,0].set_title('SB-2017')
ax[0,1].set_title('DB-2018')
ax[0,2].set_title('DB-2019')
ax[0,0].set_yticklabels(ax[0,0].get_yticklabels(), fontsize=16)
ax[1,0].set_yticklabels(ax[1,0].get_yticklabels(), fontsize=16)
ax[2,0].set_yticklabels(ax[2,0].get_yticklabels(), fontsize=16)

ax[0,0].set_ylim([0, 100])
ax[0,1].set_ylim([0, 100])
ax[0,2].set_ylim([0, 100])

ax[1,0].set_ylim([0, 100])
ax[1,1].set_ylim([0, 100])
ax[1,2].set_ylim([0, 100])

ax[2,0].set_xlim([0, 100])
ax[2,1].set_xlim([0, 100])
ax[2,2].set_xlim([0, 100])

plt.savefig("contribution of authors_citation.png",bbox_inches='tight')
fig.show()
GitHub Events

Total

Last Year

Dependencies

openreview-py/setup.py pypi
Deprecated *
future *
pycryptodome *
pyjwt *
pylatexenc *
requests >=2.18.4
setuptools ==65.5.1
tld >=0.12
tqdm *
ecosyste.ms

Data

Tools

Indexes

Applications

Experiments

Open Source Science