peer_review_bias
Science Score: 18.0%
This score indicates how likely this project is to be science-related based on various indicators:
-
✓CITATION.cff file
Found CITATION.cff file -
○codemeta.json file
-
○.zenodo.json file
-
○DOI references
-
○Academic publication links
-
○Academic email domains
-
○Institutional organization owner
-
○JOSS paper metadata
-
○Scientific vocabulary similarity
Low similarity (5.7%) to scientific vocabulary
Last synced: 10 months ago
·
JSON representation
·
Repository
Basic Info
- Host: GitHub
- Owner: sdey6
- Language: Python
- Default Branch: master
- Size: 77.1 MB
Statistics
- Stars: 0
- Watchers: 1
- Forks: 0
- Open Issues: 0
- Releases: 0
Created over 2 years ago
· Last pushed over 2 years ago
Metadata Files
Readme
Citation
README.md
Age Bias Detection in Peer Review
Brief description of your project.
Getting OpenReview data
Installation
bash
pip install openreview-py
For using serpAPI the following command needs to run
bash
pip install google-search-results
Usage
This thesis code repository is for reproducing the data collection in methodology section, also data preprocessing and database design.
Produced year wise review and author data is pickled along with the citation data in the "data_pickled" folder.
The experiment results are in textanalysis.py and numericalanalysis.py files.
Owner
- Login: sdey6
- Kind: user
- Repositories: 1
- Profile: https://github.com/sdey6
Citation (citation_analysis.py)
# -*- coding: utf-8 -*-
"""Citation Analysis.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1QZ_ZD5lWcsKklVqxpRvvpe-kT0RN1e0o
This notebook has visualization regarding citation related experiments.
"""
import pandas as pd
import pickle
import ast
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np
from google.colab import drive
drive.mount('/content/drive')
data_path = '/content/drive/MyDrive/thesis-repository/data_17_19/'
# get citation data and merge with main dataset
df_17_all = pickle.load(open(f'{data_path}all_2017.pkl', 'rb'))
df_17_all['decision'] = df_17_all['decision'] .apply(lambda x:'Accept' if x.startswith('Accept') else 'Reject')
df_17_all.title = df_17_all.title.apply(lambda x: x.lower())
df_cit_17 = pickle.load(open(f'{data_path}all_cit_17.pkl', 'rb'))
cit_17 = df_17_all.merge(df_cit_17,on='title')
df_18_all = pickle.load(open(f'{data_path}data_18_all.pkl', 'rb'))
df_18_all['decision'] = df_18_all['decision'] .apply(lambda x:'Accept' if x.startswith('Accept') else 'Reject')
df_18_all = df_18_all.rename(columns = {'Title':'title'})
df_18_all.title = df_18_all.title.apply(lambda x: x.lower())
df_cit_18 = pickle.load(open(f'{data_path}all_cit_18.pkl', 'rb'))
cit_18 = df_18_all.merge(df_cit_18,on='title')
df_19_all = pickle.load(open(f'{data_path}data_19_all.pkl', 'rb'))
df_19_all = df_19_all.rename(columns={'recommendation':'decision'})
df_19_all['decision'] = df_19_all['decision'] .apply(lambda x:'Accept' if x.startswith('Accept') else 'Reject')
df_19_all = df_19_all.rename(columns = {'Title':'title'})
df_19_all.title = df_19_all.title.apply(lambda x: x.lower())
df_cit_19 = pickle.load(open(f'{data_path}all_cit_19.pkl', 'rb'))
cit_19 = df_19_all.merge(df_cit_19,on='title')
print(cit_17.shape,cit_18.shape,cit_19.shape)
"""**Citation count Vs Rating**"""
cit_17_rev = cit_17[['title', 'decision', 'cited_by_count', 'rating_score']]
cit_17_e = cit_17_rev.explode('rating_score').reset_index(drop=True)
cit_17_e = cit_17_e.groupby('rating_score')[['title', 'cited_by_count']].agg(list).reset_index()
cit_17_e = cit_17_e.explode('cited_by_count')
cit_18_rev = cit_18[['title', 'decision', 'cited_by_count', 'rating_score']]
cit_18_e = cit_18_rev.explode('rating_score').reset_index(drop=True)
cit_18_e = cit_18_e.groupby('rating_score')[['title', 'cited_by_count']].agg(list).reset_index()
cit_18_e = cit_18_e.explode('cited_by_count')
cit_19_rev = cit_19[['title', 'decision', 'cited_by_count', 'rating_score']]
cit_19_e = cit_19_rev.explode('rating_score').reset_index(drop=True)
cit_19_e = cit_19_e.groupby('rating_score')[['title', 'cited_by_count']].agg(list).reset_index()
cit_19_e = cit_19_e.explode('cited_by_count')
fig, (ax1, ax2,ax3) = plt.subplots(1, 3, figsize=(13, 4),)
sns.barplot(data=cit_17_e,x='rating_score',y='cited_by_count',palette='Purples',errorbar=None,ax=ax1,)
sns.barplot(data=cit_18_e,x='rating_score',y='cited_by_count',palette='Purples',errorbar=None,ax=ax2,)
sns.barplot(data=cit_19_e,x='rating_score',y='cited_by_count',palette='Purples',errorbar=None,ax=ax3,)
ax1.set_ylabel('Average Citation',fontdict={ 'fontsize': 12})
ax2.set_ylabel('')
ax3.set_ylabel('')
fig.suptitle('Citation count Vs Rating',y=1.1)
ax1.set_title('SB-2017')
ax2.set_title('DB-2018')
ax3.set_title('DB-2019')
ax1.set_xlabel('')
ax2.set_xlabel('Rating',fontdict={ 'fontsize': 12})
ax3.set_xlabel('')
ax1.set_ylim([0, 820])
ax2.set_ylim([0, 800])
ax3.set_ylim([0, 800])
plt.savefig("citation vs Rating.png",bbox_inches="tight")
plt.show()
"""**Citation Count VS Paper Decision**"""
cit_17_rev['rating_score'] = cit_17_rev['rating_score'].apply(lambda x: [int(i) for i in x])
cit_17_rev['avg_rating'] = cit_17_rev['rating_score'].apply(lambda x: np.average(x, weights=x))
cit_17_rev = cit_17_rev[['title', 'decision', 'avg_rating','cited_by_count']]
cit_17_sorted = cit_17_rev.sort_values(by='avg_rating', ascending=True).reset_index(drop=True).reset_index()
cit_17_sorted = cit_17_sorted.rename(columns={'index': 'ID'})
cit_17_sorted['ID'] = cit_17_sorted['ID'].apply(lambda x: x + 1)
cit_18_rev['rating_score'] = cit_18_rev['rating_score'].apply(lambda x: [int(i) for i in x])
cit_18_rev['avg_rating'] = cit_18_rev['rating_score'].apply(lambda x: np.average(x, weights=x))
cit_18_rev = cit_18_rev[['title', 'decision', 'avg_rating','cited_by_count']]
cit_18_sorted = cit_18_rev.sort_values(by='avg_rating', ascending=True).reset_index(drop=True).reset_index()
cit_18_sorted = cit_18_sorted.rename(columns={'index': 'ID'})
cit_18_sorted['ID'] = cit_18_sorted['ID'].apply(lambda x: x + 1)
cit_19_rev['rating_score'] = cit_19_rev['rating_score'].apply(lambda x: [int(i) for i in x])
cit_19_rev['avg_rating'] = cit_19_rev['rating_score'].apply(lambda x: np.average(x, weights=x))
cit_19_rev = cit_19_rev[['title', 'decision', 'avg_rating','cited_by_count']]
cit_19_sorted = cit_19_rev.sort_values(by='avg_rating', ascending=True).reset_index(drop=True).reset_index()
cit_19_sorted = cit_19_sorted.rename(columns={'index': 'ID'})
cit_19_sorted['ID'] = cit_19_sorted['ID'].apply(lambda x: x + 1)
cit_19_sorted.head()
cit_18_sorted[(cit_18_sorted.cited_by_count==0 )&( cit_18_sorted.decision=='Accept' )]
fig, (ax1, ax2,ax3) = plt.subplots(1, 3, figsize=(14, 4))
order=['Accept','Reject']
palette = [ "#ffcc00","#993300",]
sns.scatterplot(data=cit_17_sorted, x="ID", y="cited_by_count",hue='decision',ax=ax1,alpha=.7,palette=palette,hue_order=order).legend_.remove()
sns.scatterplot(data=cit_18_sorted, x="ID", y="cited_by_count",hue='decision',ax=ax2,alpha=.7,palette=palette,hue_order=order)
sns.scatterplot(data=cit_19_sorted, x="ID", y="cited_by_count",hue='decision',ax=ax3,alpha=.7,palette=palette,hue_order=order).legend_.remove()
ax2.set_ylabel('')
ax3.set_ylabel('')
ax1.set_ylabel('Citation count',fontdict={ 'fontsize': 12})
fig.suptitle('Citation Count VS Paper Decision',y=1.1)
ax1.set_title('SB-2017')
ax2.set_title('DB-2018')
ax3.set_title('DB-2019')
ax1.set_xlabel('')
ax3.set_xlabel('')
ax1.set_ylim([0, 10000])
ax2.set_ylim([0, 10000])
ax3.set_ylim([0, 10000])
ax2.legend(loc="upper center", bbox_to_anchor=(0.5, -.08), ncol=2,fontsize=12)
ax2.set_xlabel('\n\n\npapers',fontdict={ 'fontsize': 12})
plt.savefig("citation_vs_decision.png",bbox_inches="tight")
plt.show()
cit_17_auth = cit_17[['title', 'decision', 'cited_by_count', 'academic_age']]
cit_18_auth = cit_18[['title', 'decision', 'cited_by_count', 'academic_age']]
cit_19_auth = cit_19[['title', 'decision', 'cited_by_count', 'academic_age']]
cit_17_auth = cit_17_auth.explode('academic_age')
cit_17_auth = cit_17_auth[~(cit_17_auth['academic_age'].isin(['skipped','no_match','no match']))].reset_index(drop=True)
cit_18_auth = cit_18_auth.explode('academic_age')
cit_18_auth = cit_18_auth[~(cit_18_auth['academic_age'].isin(['skipped','no_match','no match','only informal']))].reset_index(drop=True)
cit_19_auth = cit_19_auth.explode('academic_age')
cit_19_auth = cit_19_auth[~(cit_19_auth['academic_age'].isin(['skipped','no_match','no match']))].reset_index(drop=True)
def get_age_labels(age):
if age<=3:
return 'Junior'
elif age>3 and age<=10: # keep
return 'Intermediate'
elif age>10:
return 'Senior'
get_age_labels(15)
cit_17_auth['author_category'] = cit_17_auth.apply(lambda x: get_age_labels(x['academic_age']),axis=1)
cit_18_auth['author_category'] = cit_18_auth.apply(lambda x: get_age_labels(x['academic_age']),axis=1)
cit_19_auth['author_category'] = cit_19_auth.apply(lambda x: get_age_labels(x['academic_age']),axis=1)
print(cit_17_auth[cit_17_auth.cited_by_count>=100].shape)
print(cit_18_auth[cit_18_auth.cited_by_count>=100].shape)
cit_19_auth[cit_19_auth.cited_by_count>=100].shape
print(cit_17_auth[cit_17_auth.cited_by_count>=100].groupby(['title','decision'])['academic_age', 'author_category'].agg(list).reset_index().shape)
print(cit_18_auth[cit_18_auth.cited_by_count>=100].groupby(['title','decision'])['academic_age', 'author_category'].agg(list).reset_index().shape)
print(cit_19_auth[cit_19_auth.cited_by_count>=100].groupby(['title','decision'])['academic_age', 'author_category'].agg(list).reset_index().shape)
cit_17_auth.head()
ci_17_100 = (cit_17_auth[cit_17_auth.cited_by_count>=100].groupby(['author_category']).size()
/len(cit_17_auth[cit_17_auth.cited_by_count>=100])*100).reset_index().rename({0:'percentage_count'}, axis=1 )
ci_18_100 = (cit_18_auth[cit_18_auth.cited_by_count>=100].groupby(['author_category']).size()
/len(cit_18_auth[cit_18_auth.cited_by_count>=100])*100).reset_index().rename({0:'percentage_count'}, axis=1 )
ci_19_100 = (cit_19_auth[cit_19_auth.cited_by_count>=100].groupby(['author_category']).size()
/len(cit_19_auth[cit_19_auth.cited_by_count>=100])*100).reset_index().rename({0:'percentage_count'}, axis=1 )
"""**Average citation count with Juniors as First Author**"""
# get the combinations of junior and rest of the categories
def get_comb(cat_list):
if cat_list[0] == 'Junior':
if len(cat_list) == 1 : return 'NA' # only a single junior author
elif len(cat_list) > 1:
if len(set(cat_list)) <= 1 == True : return 'junior and junior' # juniors combining with juniors
if 'Senior' in cat_list and 'Intermediate' not in cat_list : return 'junior and senior' # at least one senior with juniors, no intermed
if 'Intermediate' in cat_list and 'Senior' not in cat_list : return 'junior and intermediate' # at least one intermed with juniors, no senior
if all(x in cat_list for x in ['Senior', 'Intermediate']) == True : return 'junior,intermediate,senior' # juniors with at least one senior and intermed
# elif cat_list[0] == 'Senior' and 'Intermediate' in cat_list and 'Junior' not in cat_list: return 'senior and Intermediate' # Seniors and interm
# elif cat_list[0] == 'Intermediate' and 'Senior' in cat_list and 'Junior' not in cat_list: return 'Intermediate and senior' # Interm and seniors
else: return 'NA'
cit_17_auth_a = cit_17_auth.groupby(['title','decision','cited_by_count'])[['academic_age','author_category']].agg(list).reset_index()
cit_18_auth_a = cit_18_auth.groupby(['title','decision','cited_by_count'])[['academic_age','author_category']].agg(list).reset_index()
cit_19_auth_a = cit_19_auth.groupby(['title','decision','cited_by_count'])[['academic_age','author_category']].agg(list).reset_index()
cit_17_auth_a = cit_17_auth_a[['title','decision','academic_age','cited_by_count','author_category']]
cit_18_auth_a = cit_18_auth_a[['title','decision','academic_age','cited_by_count','author_category']]
cit_19_auth_a = cit_19_auth_a[['title','decision','academic_age','cited_by_count','author_category']]
# cit_17_auth_a['is_FA_junior'] = cit_17_auth_a.author_category.apply(lambda x : check_fa_junior(x))
# cit_18_auth_a['is_FA_junior'] = cit_18_auth_a.author_category.apply(lambda x : check_fa_junior(x))
# cit_19_auth_a['is_FA_junior'] = cit_19_auth_a.author_category.apply(lambda x : check_fa_junior(x))
# cit_17_auth_a = cit_17_auth_a[cit_17_auth_a.is_FA_junior == 'FA-Junior'].reset_index(drop=True)
# cit_18_auth_a = cit_18_auth_a[cit_18_auth_a.is_FA_junior == 'FA-Junior'].reset_index(drop=True)
# cit_19_auth_a = cit_19_auth_a[cit_19_auth_a.is_FA_junior == 'FA-Junior'].reset_index(drop=True)
# order = ['junior,intermediate,senior','junior and senior','Intermediate and senior','junior and intermediate', 'senior and Intermediate',
# 'junior and junior',]
cit_17_auth_a['Combination'] = cit_17_auth_a.author_category.apply(lambda x : get_comb(x))
cit_17_auth_a = cit_17_auth_a[~(cit_17_auth_a.Combination.isnull())]
cit_17_auth_a = cit_17_auth_a[cit_17_auth_a.Combination !='NA'].reset_index(drop=True)
# cit_17_auth_a = cit_17_auth_a[cit_17_auth_a.Combination.isin(order)].reset_index(drop=True)
cit_18_auth_a['Combination'] = cit_18_auth_a.author_category.apply(lambda x : get_comb(x))
cit_18_auth_a = cit_18_auth_a[cit_18_auth_a.Combination !='NA'].reset_index(drop=True)
# cit_18_auth_a = cit_18_auth_a[cit_18_auth_a.Combination.isin(order)].reset_index(drop=True)
cit_19_auth_a['Combination'] = cit_19_auth_a.author_category.apply(lambda x : get_comb(x))
cit_19_auth_a = cit_19_auth_a[cit_19_auth_a.Combination !='NA'].reset_index(drop=True)
# cit_19_auth_a = cit_19_auth_a[cit_19_auth_a.Combination.isin(order)]
comb_17 = cit_17_auth_a.groupby('Combination')['cited_by_count'].mean().reset_index()
# comb_17 = comb_17.explode('cited_by_count').reset_index(drop=True)
# comb_17.cited_by_count = comb_17.cited_by_count.astype(int)
comb_18 = cit_18_auth_a.groupby('Combination')['cited_by_count'].mean().reset_index()
# comb_18 = comb_18.explode('cited_by_count').reset_index(drop=True)
# comb_18.cited_by_count = comb_18.cited_by_count.astype(int)
comb_19 = cit_19_auth_a.groupby('Combination')['cited_by_count'].mean().reset_index()
# comb_19 = comb_19.explode('cited_by_count').reset_index(drop=True)
# comb_19.cited_by_count = comb_19.cited_by_count.astype(int)
ci_17_100_comb = (cit_17_auth_a[cit_17_auth_a.cited_by_count>=50].groupby(['Combination']).size()
/len(cit_17_auth_a[cit_17_auth_a.cited_by_count>=50])*100).reset_index().rename({0:'percentage_count'}, axis=1 )
ci_18_100_comb = (cit_18_auth_a[cit_18_auth_a.cited_by_count>=50].groupby(['Combination']).size()
/len(cit_18_auth_a[cit_18_auth_a.cited_by_count>=50])*100).reset_index().rename({0:'percentage_count'}, axis=1 )
ci_19_100_comb = (cit_19_auth_a[cit_19_auth_a.cited_by_count>=50].groupby(['Combination']).size()
/len(cit_19_auth_a[cit_19_auth_a.cited_by_count>=50])*100).reset_index().rename({0:'percentage_count'}, axis=1 )
def check_fa_all(auth_cat_list):
for cat in auth_cat_list:
if cat == 'Junior': return 'FA-Junior'
elif cat == 'Intermediate': return 'FA-Intermediate'
else: return 'FA-Senior'
cit_17_auth_b = cit_17_auth.groupby(['title','decision','cited_by_count'])[['academic_age','author_category']].agg(list).reset_index()
cit_17_auth_b['fa_category'] = cit_17_auth_b.author_category.apply(lambda x: check_fa_all(x))
cit_17_auth_b = cit_17_auth_b[['title','decision','cited_by_count','fa_category']]
cit_18_auth_b = cit_18_auth.groupby(['title','decision','cited_by_count'])[['academic_age','author_category']].agg(list).reset_index()
cit_18_auth_b['fa_category'] = cit_18_auth_b.author_category.apply(lambda x: check_fa_all(x))
cit_18_auth_b = cit_18_auth_b[['title','decision','cited_by_count','fa_category']]
cit_19_auth_b = cit_19_auth.groupby(['title','decision','cited_by_count'])[['academic_age','author_category']].agg(list).reset_index()
cit_19_auth_b['fa_category'] = cit_19_auth_b.author_category.apply(lambda x: check_fa_all(x))
cit_19_auth_b = cit_19_auth_b[['title','decision','cited_by_count','fa_category']]
import matplotlib.patheffects as path_effects
def add_median_labels(ax, fmt='.1f'):
lines = ax.get_lines()
boxes = [c for c in ax.get_children() if type(c).__name__ == 'PathPatch']
lines_per_box = int(len(lines) / len(boxes))
for median in lines[4:len(lines):lines_per_box]:
x, y = (data.mean() for data in median.get_data())
# choose value depending on horizontal or vertical plot orientation
value = x if (median.get_xdata()[1] - median.get_xdata()[0]) == 0 else y
text = ax.text(x, y, f'{value:{fmt}}', ha='center', va='center',
fontweight='bold', color='white')
# create median-colored border around white text for contrast
text.set_path_effects([
path_effects.Stroke(linewidth=3, foreground=median.get_color()),
path_effects.Normal(),
])
"""**Average citation count for one author**"""
cit_17_auth_one = cit_17_auth.groupby(['title','decision','cited_by_count',])[['academic_age','author_category']].agg(list).reset_index()
cit_18_auth_one = cit_18_auth.groupby(['title','decision','cited_by_count',])[['academic_age','author_category']].agg(list).reset_index()
cit_19_auth_one = cit_19_auth.groupby(['title','decision','cited_by_count',])[['academic_age','author_category']].agg(list).reset_index()
cit_17_auth_one['auth_len'] = cit_17_auth_one.academic_age.apply(lambda x: len(x))
cit_17_auth_one = cit_17_auth_one[cit_17_auth_one.auth_len == 1].reset_index(drop=True)
cit_17_auth_one.author_category = cit_17_auth_one.author_category.apply(lambda x: x[0])
cit_17_auth_one = cit_17_auth_one[['title','cited_by_count','author_category']]
cit_18_auth_one['auth_len'] = cit_18_auth_one.academic_age.apply(lambda x: len(x))
cit_18_auth_one = cit_18_auth_one[cit_18_auth_one.auth_len == 1].reset_index(drop=True)
cit_18_auth_one.author_category = cit_18_auth_one.author_category.apply(lambda x: x[0])
cit_18_auth_one = cit_18_auth_one[['title','cited_by_count','author_category']]
cit_19_auth_one['auth_len'] = cit_19_auth_one.academic_age.apply(lambda x: len(x))
cit_19_auth_one = cit_19_auth_one[cit_19_auth_one.auth_len == 1].reset_index(drop=True)
cit_19_auth_one.author_category = cit_19_auth_one.author_category.apply(lambda x: x[0])
cit_19_auth_one = cit_19_auth_one[['title','cited_by_count','author_category']]
hue_order = [ 'Senior','Intermediate','Junior']
order_fa = ['FA-Senior', 'FA-Intermediate', 'FA-Junior']
fig, ax = plt.subplots(2, 3, figsize=(12, 7),sharey=True)
order = ['junior,intermediate,senior','junior and senior','junior and intermediate',
'junior and junior',]
sns.barplot( data = cit_17_auth_one,x='author_category',y='cited_by_count',order=hue_order,ax=ax[1,0],errorbar=None)
sns.barplot( data = cit_18_auth_one,x='author_category',y='cited_by_count',order=hue_order,ax=ax[1,1],errorbar=None)
sns.barplot( data = cit_19_auth_one,x='author_category',y='cited_by_count',order=hue_order,ax=ax[1,2],errorbar=None)
sns.barplot( data = cit_17_auth_b,x='fa_category',y='cited_by_count',order=order_fa,ax=ax[0,0],errorbar=None)
sns.barplot( data = cit_18_auth_b,x='fa_category',y='cited_by_count',order=order_fa,ax=ax[0,1],errorbar=None)
sns.barplot( data = cit_19_auth_b,x='fa_category',y='cited_by_count',order=order_fa,ax=ax[0,2],errorbar=None)
# sns.barplot( data = comb_17,x='cited_by_count',y='Combination',order=order,ax=ax[2,0],errorbar=None)
# sns.barplot( data = comb_18,x='cited_by_count',y='Combination',order=order,ax=ax[2,1],errorbar=None).set(yticklabels=[])
# sns.barplot( data = comb_19,x='cited_by_count',y='Combination',order=order,ax=ax[2,2],errorbar=None).set(yticklabels=[])
fig.tight_layout()
ax[0,1].set_ylabel('')
ax[0,2].set_ylabel('')
ax[0,0].set_xlabel('')
ax[0,1].set_xlabel('Multiple Authors',fontdict={ 'fontsize': 12})
ax[0,2].set_xlabel('')
ax[1,0].set_xlabel('')
ax[1,1].set_xlabel('Single Author',fontdict={ 'fontsize': 12})
ax[1,2].set_xlabel('')
ax[1,1].set_ylabel('')
ax[1,2].set_ylabel('')
ax[0,0].set_ylabel('\n Average Citation',fontdict={ 'fontsize': 12})
ax[1,0].set_ylabel('\n Average Citation',fontdict={ 'fontsize': 12})
for container in ax[0,0].containers:
ax[0,0].bar_label(container, fmt='%.0f',weight='bold')
for container in ax[0,1].containers:
ax[0,1].bar_label(container, fmt='%.0f',weight='bold')
for container in ax[0,2].containers:
ax[0,2].bar_label(container, fmt='%.0f',weight='bold')
for container in ax[1,0].containers:
ax[1,0].bar_label(container, fmt='%.0f',weight='bold')
for container in ax[1,1].containers:
ax[1,1].bar_label(container, fmt='%.0f',weight='bold')
for container in ax[1,2].containers:
ax[1,2].bar_label(container, fmt='%.0f',weight='bold')
fig.suptitle('Author actegory Statistics',y=1.05)
ax[0,0].set_title('SB-2017')
ax[0,1].set_title('DB-2018')
ax[0,2].set_title('DB-2019')
plt.savefig("avg_citation_single_multi.png",bbox_inches='tight')
fig.show()
fig, (ax1, ax2,ax3) = plt.subplots(1, 3, figsize=(15, 5.5))
order = ['junior,intermediate,senior','junior and senior','junior and intermediate','junior and junior']
sns.barplot(data = ci_17_100_comb,x='percentage_count',y='Combination',ax=ax1,
palette='rocket',order=order)#palette='rocket'
sns.barplot(data = ci_18_100_comb,x='percentage_count',y='Combination',ax=ax2,order=order,
palette='rocket').set(yticklabels=[])
sns.barplot(data = ci_19_100_comb,x='percentage_count',y='Combination',ax=ax3,palette='rocket',order=order).set(yticklabels=[])
for container in ax1.containers:
ax1.bar_label(container, fmt='%.1f',weight='bold',fontsize=12)
for container in ax2.containers:
ax2.bar_label(container, fmt='%.1f',weight='bold',fontsize=12)
for container in ax3.containers:
ax3.bar_label(container, fmt='%.1f',weight='bold',fontsize=12)
ax1.set_xlim([0, 100])
ax2.set_xlim([0, 100])
ax3.set_xlim([0, 100])
ax1.set_yticklabels(ax1.get_yticklabels(), fontsize=12)
ax2.set_ylabel('')
ax3.set_ylabel('')
ax1.set_xlabel('')
ax3.set_xlabel('')
ax2.set_xlabel('Percentage count',fontdict={ 'fontsize': 13})
ax1.set_ylabel('Collaboration',fontdict={ 'fontsize': 13})
fig.suptitle('Participation of Authors in Papers with >100 Citation',y=1,)
ax1.set_title('SB-2017')
ax2.set_title('DB-2018')
ax3.set_title('DB-2019')
# ax1.tick_params(axis='x', rotation=80)
# ax2.tick_params(axis='x', rotation=80)
# ax3.tick_params(axis='x', rotation=80)
fig.tight_layout()
plt.savefig("contribution of authors_citation.png", bbox_inches="tight")
fig.show()
"""**Number of authors contributing to papers with more than 100 citation**"""
ci_17_100_one = (cit_17_auth_one[cit_17_auth_one.cited_by_count>=50].groupby(['author_category']).size()
/len(cit_17_auth_one[cit_17_auth_one.cited_by_count>=50])*100).reset_index().rename({0:'percentage_count'}, axis=1 )
ci_18_100_one = (cit_18_auth_one[cit_18_auth_one.cited_by_count>=50].groupby(['author_category']).size()
/len(cit_18_auth_one[cit_18_auth_one.cited_by_count>=50])*100).reset_index().rename({0:'percentage_count'}, axis=1 )
ci_19_100_one = (cit_19_auth_one[cit_19_auth_one.cited_by_count>=50].groupby(['author_category']).size()
/len(cit_19_auth_one[cit_19_auth_one.cited_by_count>=50])*100).reset_index().rename({0:'percentage_count'}, axis=1 )
# plot papers decision ratio
hue_order = [ 'Senior','Intermediate','Junior']
order_fa = ['FA-Senior', 'FA-Intermediate', 'FA-Junior']
order = ['junior,intermediate,senior','junior and senior','junior and intermediate',
'junior and junior',]
fig, ax = plt.subplots(3, 3, figsize=(18, 13),)
sns.barplot( data = ci_17_100_one,x='author_category',y='percentage_count',order=hue_order,ax=ax[0,0])
a = sns.barplot( data = ci_18_100_one,x='author_category',y='percentage_count',order=hue_order,ax=ax[0,1])
a.set(yticklabels=[])
sns.barplot( data = ci_19_100_one,x='author_category',y='percentage_count',order=hue_order,ax=ax[0,2]).set(yticklabels=[])
sns.barplot( data = ci_17_100,x='author_category',y='percentage_count',order=hue_order,ax=ax[1,0])
sns.barplot( data = ci_18_100,x='author_category',y='percentage_count',order=hue_order,ax=ax[1,1]).set(yticklabels=[])
sns.barplot( data = ci_19_100,x='author_category',y='percentage_count',order=hue_order,ax=ax[1,2]).set(yticklabels=[])
sns.barplot( data = ci_17_100_comb,x='percentage_count',y='Combination',order=order,ax=ax[2,0])
sns.barplot( data = ci_18_100_comb,x='percentage_count',y='Combination',order=order,ax=ax[2,1]).set(yticklabels=[])
sns.barplot( data = ci_19_100_comb,x='percentage_count',y='Combination',order=order,ax=ax[2,2]).set(yticklabels=[])
fig.tight_layout()
ax[0,1].set_xlabel(' One Author',fontdict={ 'fontsize': 15})
ax[0,2].set_ylabel('')
ax[0,0].set_xlabel('')
ax[0,0].set_ylabel('')
ax[1,0].set_ylabel('')
ax[2,0].set_ylabel('')
ax[0,1].set_ylabel('')
ax[0,2].set_xlabel('',fontsize=12)
ax[1,0].set_xlabel('')
ax[1,1].set_ylabel('')
ax[1,2].set_xlabel('',fontsize=12)
ax[1,1].set_xlabel(' First Author',fontdict={ 'fontsize': 15})
ax[1,2].set_ylabel('')
ax[2,0].set_xlabel('')
ax[2,2].set_xlabel('')
ax[2,1].set_ylabel('')
ax[2,2].set_ylabel('')
ax[2,1].set_xlabel('\nAuthor Combination',fontdict={ 'fontsize': 15})
for container in ax[0,0].containers:
ax[0,0].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
for container in ax[0,1].containers:
ax[0,1].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
for container in ax[0,2].containers:
ax[0,2].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
for container in ax[1,0].containers:
ax[1,0].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
for container in ax[1,1].containers:
ax[1,1].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
for container in ax[1,2].containers:
ax[1,2].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
for container in ax[2,0].containers:
ax[2,0].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
for container in ax[2,1].containers:
ax[2,1].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
for container in ax[2,2].containers:
ax[2,2].bar_label(container, fmt='%.0f',weight='bold',fontsize=12)
fig.suptitle('Percentage of Authors for papers with Citation > 100',y=1.05,fontsize=14)
ax[0,0].set_title('SB-2017')
ax[0,1].set_title('DB-2018')
ax[0,2].set_title('DB-2019')
ax[0,0].set_yticklabels(ax[0,0].get_yticklabels(), fontsize=16)
ax[1,0].set_yticklabels(ax[1,0].get_yticklabels(), fontsize=16)
ax[2,0].set_yticklabels(ax[2,0].get_yticklabels(), fontsize=16)
ax[0,0].set_ylim([0, 100])
ax[0,1].set_ylim([0, 100])
ax[0,2].set_ylim([0, 100])
ax[1,0].set_ylim([0, 100])
ax[1,1].set_ylim([0, 100])
ax[1,2].set_ylim([0, 100])
ax[2,0].set_xlim([0, 100])
ax[2,1].set_xlim([0, 100])
ax[2,2].set_xlim([0, 100])
plt.savefig("contribution of authors_citation.png",bbox_inches='tight')
fig.show()
GitHub Events
Total
Last Year
Dependencies
openreview-py/setup.py
pypi
- Deprecated *
- future *
- pycryptodome *
- pyjwt *
- pylatexenc *
- requests >=2.18.4
- setuptools ==65.5.1
- tld >=0.12
- tqdm *