Science Score: 18.0%
This score indicates how likely this project is to be science-related based on various indicators:
-
✓CITATION.cff file
Found CITATION.cff file -
○codemeta.json file
-
○.zenodo.json file
-
○DOI references
-
○Academic links in README
-
○Academic email domains
-
○Institutional organization owner
-
○JOSS paper metadata
-
○Scientific vocabulary similarity
Unable to calculate vocabulary similarity
Last synced: 10 months ago
·
JSON representation
·
Repository
phenetics
Basic Info
Statistics
- Stars: 0
- Watchers: 2
- Forks: 0
- Open Issues: 0
- Releases: 0
Created over 5 years ago
· Last pushed over 5 years ago
Metadata Files
Readme
Citation
Owner
- Name: Martin Galese
- Login: fros1y
- Kind: user
- Repositories: 8
- Profile: https://github.com/fros1y
Citation (Citation Extractor.ipynb)
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import psycopg2\n",
"from tqdm.auto import tqdm\n",
"from typing import Dict\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"conn = psycopg2.connect(\"dbname=patents user=martin host=localhost password=F0hn1lcob.r\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"cur = conn.cursor('citation_download')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"cur.execute(\"SELECT patent_id, edges FROM citation_adj where patent_id in (select patent_id from patents where country_code in ('US', 'EP', 'JP'));\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open(\"/var/patentmark/citation_edges_eu_jp_us.txt\", \"w\") as outfile:\n",
" for row in tqdm(cur):\n",
" outfile.write(str(row[0])) \n",
" outfile.write(\" \")\n",
" outfile.write(' '.join(map(str, row[1])))\n",
" outfile.write(\"\\n\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5a7dfa818bc9432b905386267b80a88a",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"HBox(children=(FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0), HTML(value=''…"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"cur.execute(\"select patent_id, publication_number from patents;\")\n",
"with open(\"/var/patentmark/citation_mapping.csv\", \"w\") as outfile:\n",
" for row in tqdm(cur):\n",
" outfile.write(str(row[0]))\n",
" outfile.write(\",\")\n",
" outfile.write(row[1])\n",
" outfile.write(\"\\n\")\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np_embeddings = np.fromfile(\"/var/patentmark/citations.verse.32d\", dtype=np.float32)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"id_lookup = pd.read_csv(\"/var/patentmark/citation_mapping.csv\", header=None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"id_lookup.columns = [\"patent_id\", \"publication_number\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"id_lookup.set_index(\"patent_id\", inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"id_lookup"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np_embeddings = np_embeddings.reshape(-1,32)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"embedding_frame = pd.DataFrame(np_embeddings).apply(np.array, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"embedding_frame.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"embedding_frame.columns = [\"citation_based_embedding\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"embeddings = embedding_frame.to_frame().join(id_lookup, how=\"inner\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"embeddings.set_index(\"publication_number\", inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"embeddings.columns=[\"citation_based_embedding\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"embeddings"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"embeddings.to_parquet(\"citation_embeddings.parquet\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}