Science Score: 31.0%

This score indicates how likely this project is to be science-related based on various indicators:

  • CITATION.cff file
    Found CITATION.cff file
  • codemeta.json file
    Found codemeta.json file
  • .zenodo.json file
  • DOI references
  • Academic publication links
  • Academic email domains
  • Institutional organization owner
  • JOSS paper metadata
  • Scientific vocabulary similarity
    Unable to calculate vocabulary similarity
Last synced: 10 months ago · JSON representation ·

Repository

Basic Info
  • Host: GitHub
  • Owner: edgarLan
  • Language: Jupyter Notebook
  • Default Branch: main
  • Size: 34.9 MB
Statistics
  • Stars: 0
  • Watchers: 1
  • Forks: 0
  • Open Issues: 0
  • Releases: 0
Created over 1 year ago · Last pushed about 1 year ago
Metadata Files
Readme Citation

README.md

brevetNLPv2

Owner

  • Login: edgarLan
  • Kind: user

Citation (citations/a3k_explo.ipynb)

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# pip install --use-pep517 alexandria3k"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from alexandria3k.data_sources import uspto"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "uspto_directory = \"C:/Users/edgar/OneDrive/Bureau/Ecole/HEC/A24/BrevetNLP/citations_data/patentsView\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<alexandria3k.data_sources.uspto.Uspto at 0x265e792be50>"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# uspto.Uspto(uspto_directory=uspto_directory)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# uspto.Uspto(uspto_directory=uspto_directory).download(data_location=uspto_directory)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "uspto_data = uspto.Uspto(uspto_directory)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm import tqdm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "database_path = \"uspto_data.db\"\n",
    "uspto_data.populate(database_path, columns=[\"usp_citations.*\"], )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   patent_id patcit_doc_number patcit_country patcit_kind patcit_date  \\\n",
      "0          0            D11495             US           S    18791100   \n",
      "1          0            D50715             US           S    19170500   \n",
      "2          0            D74119             US           S    19271200   \n",
      "3          0           D119611             US           S    19400300   \n",
      "4          0           3140954             US           A    19640700   \n",
      "\n",
      "            category  \n",
      "0     cited by other  \n",
      "1     cited by other  \n",
      "2  cited by examiner  \n",
      "3  cited by examiner  \n",
      "4     cited by other  \n"
     ]
    }
   ],
   "source": [
    "import sqlite3\n",
    "import pandas as pd\n",
    "\n",
    "conn = sqlite3.connect(database_path)\n",
    "cursor = conn.cursor()\n",
    "\n",
    "query = \"\"\"\n",
    "SELECT\n",
    "    patent_id, patcit_doc_number, patcit_country, patcit_kind, patcit_date, category\n",
    "FROM usp_citations\n",
    "\"\"\"\n",
    "\n",
    "# query = \"\"\"\n",
    "# SELECT\n",
    "#     patent_id, patcit_doc_number, patcit_country, patcit_kind, patcit_date, category,\n",
    "#     date_published\n",
    "# FROM usp_citations\n",
    "# JOIN us_patents ON usp_citations.patent_id = us_patents.id;\n",
    "# \"\"\"\n",
    "\n",
    "# Execute the query\n",
    "cursor.execute(query)\n",
    "\n",
    "# Fetch the results\n",
    "results = cursor.fetchall()\n",
    "\n",
    "# Get the column names (this can be useful for the DataFrame)\n",
    "columns = [description[0] for description in cursor.description]\n",
    "\n",
    "# Create a DataFrame from the results\n",
    "df = pd.DataFrame(results, columns=columns)\n",
    "\n",
    "# Close the database connection\n",
    "conn.close()\n",
    "\n",
    "# Now you can work with the DataFrame (df)\n",
    "print(df.head())  # Print the first few rows of the DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0         18791100\n",
       "1         19170500\n",
       "2         19271200\n",
       "3         19400300\n",
       "4         19640700\n",
       "            ...   \n",
       "196060    20070300\n",
       "196061    20070400\n",
       "196062    19881100\n",
       "196063    20020500\n",
       "196064        None\n",
       "Name: patcit_date, Length: 196065, dtype: object"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"patcit_date\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "usp_citations\n",
      "us_patents\n"
     ]
    }
   ],
   "source": [
    "conn = sqlite3.connect(database_path)  # Replace with your database path\n",
    "\n",
    "# Create a cursor object\n",
    "cursor = conn.cursor()\n",
    "\n",
    "# Query to retrieve all table names\n",
    "query = \"\"\"\n",
    "SELECT name FROM sqlite_master WHERE type='table';\n",
    "\"\"\"\n",
    "\n",
    "# Execute the query\n",
    "cursor.execute(query)\n",
    "\n",
    "# Fetch all the results (table names)\n",
    "tables = cursor.fetchall()\n",
    "\n",
    "# Print the table names\n",
    "for table in tables:\n",
    "    print(table[0])  # Each row is a tuple, so we access the first element (table name)\n",
    "    \n",
    "# Close the connection\n",
    "conn.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "database_path = \"uspto_data.db\"\n",
    "\n",
    "# Define the condition for the JOIN operation\n",
    "condition = \"\"\"\n",
    "usp_citations.patent_id = us_patents.id\n",
    "\"\"\"\n",
    "\n",
    "# Define the columns you want to populate\n",
    "columns = [\"usp_citations.*\", \"us_patents.*\"]\n",
    "\n",
    "# Call the populate function\n",
    "uspto_data.populate(database_path, columns=columns, condition=condition)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "   patent_id patcit_doc_number patcit_country patcit_kind patcit_date  \\\n",
      "0          0          20120103             US           S    18791100   \n",
      "1          0          20120103             US           S    19170500   \n",
      "2          0          20120103             US           S    19271200   \n",
      "3          0          20120103             US           S    19400300   \n",
      "4          0          20120103             US           A    19640700   \n",
      "\n",
      "            category date_published  \n",
      "0     cited by other       20120103  \n",
      "1     cited by other       20120103  \n",
      "2  cited by examiner       20120103  \n",
      "3  cited by examiner       20120103  \n",
      "4     cited by other       20120103  \n"
     ]
    }
   ],
   "source": [
    "import sqlite3\n",
    "import pandas as pd\n",
    "\n",
    "conn = sqlite3.connect(database_path)\n",
    "cursor = conn.cursor()\n",
    "\n",
    "# query = \"\"\"\n",
    "# SELECT\n",
    "#     patent_id, patcit_doc_number, patcit_country, patcit_kind, patcit_date, category\n",
    "# FROM usp_citations\n",
    "# \"\"\"\n",
    "\n",
    "query = \"\"\"\n",
    "SELECT\n",
    "    patent_id, date_published patcit_doc_number, patcit_country, patcit_kind, patcit_date, category,\n",
    "    date_published\n",
    "FROM usp_citations\n",
    "JOIN us_patents ON usp_citations.patent_id = us_patents.id;\n",
    "\"\"\"\n",
    "\n",
    "# Execute the query\n",
    "cursor.execute(query)\n",
    "\n",
    "# Fetch the results\n",
    "results = cursor.fetchall()\n",
    "\n",
    "# Get the column names (this can be useful for the DataFrame)\n",
    "columns = [description[0] for description in cursor.description]\n",
    "\n",
    "# Create a DataFrame from the results\n",
    "df = pd.DataFrame(results, columns=columns)\n",
    "\n",
    "# Close the database connection\n",
    "conn.close()\n",
    "\n",
    "# Now you can work with the DataFrame (df)\n",
    "print(df.head())  # Print the first few rows of the DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>patent_id</th>\n",
       "      <th>patcit_doc_number</th>\n",
       "      <th>patcit_country</th>\n",
       "      <th>patcit_kind</th>\n",
       "      <th>patcit_date</th>\n",
       "      <th>category</th>\n",
       "      <th>date_published</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>20120103</td>\n",
       "      <td>US</td>\n",
       "      <td>S</td>\n",
       "      <td>18791100</td>\n",
       "      <td>cited by other</td>\n",
       "      <td>20120103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>20120103</td>\n",
       "      <td>US</td>\n",
       "      <td>S</td>\n",
       "      <td>19170500</td>\n",
       "      <td>cited by other</td>\n",
       "      <td>20120103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>20120103</td>\n",
       "      <td>US</td>\n",
       "      <td>S</td>\n",
       "      <td>19271200</td>\n",
       "      <td>cited by examiner</td>\n",
       "      <td>20120103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>20120103</td>\n",
       "      <td>US</td>\n",
       "      <td>S</td>\n",
       "      <td>19400300</td>\n",
       "      <td>cited by examiner</td>\n",
       "      <td>20120103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>20120103</td>\n",
       "      <td>US</td>\n",
       "      <td>A</td>\n",
       "      <td>19640700</td>\n",
       "      <td>cited by other</td>\n",
       "      <td>20120103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11733475</th>\n",
       "      <td>277283</td>\n",
       "      <td>20121225</td>\n",
       "      <td>JP</td>\n",
       "      <td>A</td>\n",
       "      <td>20010400</td>\n",
       "      <td>cited by other</td>\n",
       "      <td>20121225</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11733476</th>\n",
       "      <td>277283</td>\n",
       "      <td>20121225</td>\n",
       "      <td>JP</td>\n",
       "      <td>A</td>\n",
       "      <td>20051000</td>\n",
       "      <td>cited by other</td>\n",
       "      <td>20121225</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11733477</th>\n",
       "      <td>277283</td>\n",
       "      <td>20121225</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>cited by other</td>\n",
       "      <td>20121225</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11733478</th>\n",
       "      <td>277283</td>\n",
       "      <td>20121225</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>cited by other</td>\n",
       "      <td>20121225</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11733479</th>\n",
       "      <td>277283</td>\n",
       "      <td>20121225</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>cited by other</td>\n",
       "      <td>20121225</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>11733480 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          patent_id patcit_doc_number patcit_country patcit_kind patcit_date  \\\n",
       "0                 0          20120103             US           S    18791100   \n",
       "1                 0          20120103             US           S    19170500   \n",
       "2                 0          20120103             US           S    19271200   \n",
       "3                 0          20120103             US           S    19400300   \n",
       "4                 0          20120103             US           A    19640700   \n",
       "...             ...               ...            ...         ...         ...   \n",
       "11733475     277283          20121225             JP           A    20010400   \n",
       "11733476     277283          20121225             JP           A    20051000   \n",
       "11733477     277283          20121225           None        None        None   \n",
       "11733478     277283          20121225           None        None        None   \n",
       "11733479     277283          20121225           None        None        None   \n",
       "\n",
       "                   category date_published  \n",
       "0            cited by other       20120103  \n",
       "1            cited by other       20120103  \n",
       "2         cited by examiner       20120103  \n",
       "3         cited by examiner       20120103  \n",
       "4            cited by other       20120103  \n",
       "...                     ...            ...  \n",
       "11733475     cited by other       20121225  \n",
       "11733476     cited by other       20121225  \n",
       "11733477     cited by other       20121225  \n",
       "11733478     cited by other       20121225  \n",
       "11733479     cited by other       20121225  \n",
       "\n",
       "[11733480 rows x 7 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
     ]
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(11733480, 7)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>patcit_doc_number</th>\n",
       "      <th>patcit_country</th>\n",
       "      <th>patcit_kind</th>\n",
       "      <th>patcit_date</th>\n",
       "      <th>category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>D11495</td>\n",
       "      <td>US</td>\n",
       "      <td>S</td>\n",
       "      <td>18791100</td>\n",
       "      <td>cited by other</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>D50715</td>\n",
       "      <td>US</td>\n",
       "      <td>S</td>\n",
       "      <td>19170500</td>\n",
       "      <td>cited by other</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>D74119</td>\n",
       "      <td>US</td>\n",
       "      <td>S</td>\n",
       "      <td>19271200</td>\n",
       "      <td>cited by examiner</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>D119611</td>\n",
       "      <td>US</td>\n",
       "      <td>S</td>\n",
       "      <td>19400300</td>\n",
       "      <td>cited by examiner</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3140954</td>\n",
       "      <td>US</td>\n",
       "      <td>A</td>\n",
       "      <td>19640700</td>\n",
       "      <td>cited by other</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11733475</th>\n",
       "      <td>2001-108601</td>\n",
       "      <td>JP</td>\n",
       "      <td>A</td>\n",
       "      <td>20010400</td>\n",
       "      <td>cited by other</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11733476</th>\n",
       "      <td>2005-283538</td>\n",
       "      <td>JP</td>\n",
       "      <td>A</td>\n",
       "      <td>20051000</td>\n",
       "      <td>cited by other</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11733477</th>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>cited by other</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11733478</th>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>cited by other</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11733479</th>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>cited by other</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>11733480 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         patcit_doc_number patcit_country patcit_kind patcit_date  \\\n",
       "0                   D11495             US           S    18791100   \n",
       "1                   D50715             US           S    19170500   \n",
       "2                   D74119             US           S    19271200   \n",
       "3                  D119611             US           S    19400300   \n",
       "4                  3140954             US           A    19640700   \n",
       "...                    ...            ...         ...         ...   \n",
       "11733475       2001-108601             JP           A    20010400   \n",
       "11733476       2005-283538             JP           A    20051000   \n",
       "11733477              None           None        None        None   \n",
       "11733478              None           None        None        None   \n",
       "11733479              None           None        None        None   \n",
       "\n",
       "                   category  \n",
       "0            cited by other  \n",
       "1            cited by other  \n",
       "2         cited by examiner  \n",
       "3         cited by examiner  \n",
       "4            cited by other  \n",
       "...                     ...  \n",
       "11733475     cited by other  \n",
       "11733476     cited by other  \n",
       "11733477     cited by other  \n",
       "11733478     cited by other  \n",
       "11733479     cited by other  \n",
       "\n",
       "[11733480 rows x 5 columns]"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "0it [00:00, ?it/s]\n"
     ]
    },
    {
     "ename": "ValueError",
     "evalue": "not enough values to unpack (expected 1, got 0)",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[23], line 7\u001b[0m\n\u001b[0;32m      1\u001b[0m query \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;124mSELECT\u001b[39m\n\u001b[0;32m      3\u001b[0m \u001b[38;5;124m    patent_id, patcit_doc_number, patcit_country, patcit_kind, patcit_date, category\u001b[39m\n\u001b[0;32m      4\u001b[0m \u001b[38;5;124mFROM usp_citations\u001b[39m\n\u001b[0;32m      5\u001b[0m \u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[0;32m      6\u001b[0m i\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m\n\u001b[1;32m----> 7\u001b[0m \u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrow\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mtqdm\u001b[49m\u001b[43m(\u001b[49m\u001b[43muspto_data\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[0;32m      8\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mprint\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mrow\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mc:\\Users\\edgar\\OneDrive\\Bureau\\Ecole\\HEC\\A24\\BrevetNLP\\.conda\\Lib\\site-packages\\tqdm\\std.py:1181\u001b[0m, in \u001b[0;36mtqdm.__iter__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m   1178\u001b[0m time \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_time\n\u001b[0;32m   1180\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 1181\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43miterable\u001b[49m\u001b[43m:\u001b[49m\n\u001b[0;32m   1182\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43;01myield\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\n\u001b[0;32m   1183\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# Update and possibly print the progressbar.\u001b[39;49;00m\n\u001b[0;32m   1184\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# Note: does not call self.update(1) for speed optimisation.\u001b[39;49;00m\n",
      "File \u001b[1;32mc:\\Users\\edgar\\OneDrive\\Bureau\\Ecole\\HEC\\A24\\BrevetNLP\\.conda\\Lib\\site-packages\\alexandria3k\\data_source.py:706\u001b[0m, in \u001b[0;36mDataSource.query\u001b[1;34m(self, query, partition)\u001b[0m\n\u001b[0;32m    704\u001b[0m \u001b[38;5;66;03m# Easy case\u001b[39;00m\n\u001b[0;32m    705\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m partition:\n\u001b[1;32m--> 706\u001b[0m     \u001b[38;5;28;01myield from\u001b[39;00m \u001b[43mtry_sql_execute\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcursor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    707\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m    709\u001b[0m \u001b[38;5;66;03m# Even when restricting multiple JOINs with container_id\u001b[39;00m\n\u001b[0;32m    710\u001b[0m \u001b[38;5;66;03m# SQLite seems to scan all containers for each JOIN making the\u001b[39;00m\n\u001b[0;32m    711\u001b[0m \u001b[38;5;66;03m# performance intolerably slow. Address this by creating non-virtual\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    719\u001b[0m \u001b[38;5;66;03m#   Run query on in-memory database\u001b[39;00m\n\u001b[0;32m    720\u001b[0m \u001b[38;5;66;03m#   drop tables\u001b[39;00m\n",
      "File \u001b[1;32mc:\\Users\\edgar\\OneDrive\\Bureau\\Ecole\\HEC\\A24\\BrevetNLP\\.conda\\Lib\\site-packages\\alexandria3k\\common.py:200\u001b[0m, in \u001b[0;36mtry_sql_execute\u001b[1;34m(execution_context, statement)\u001b[0m\n\u001b[0;32m    187\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    188\u001b[0m \u001b[38;5;124;03mReturn the result of executing the specified SQL statement.\u001b[39;00m\n\u001b[0;32m    189\u001b[0m \u001b[38;5;124;03mThe statement is logged through log_sql. If the satement's\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    197\u001b[0m \u001b[38;5;124;03m:type statement: str\u001b[39;00m\n\u001b[0;32m    198\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    199\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 200\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mexecution_context\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlog_sql\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstatement\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    201\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m apsw\u001b[38;5;241m.\u001b[39mSQLError \u001b[38;5;28;01mas\u001b[39;00m exception:\n\u001b[0;32m    202\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m Alexandria3kError(\n\u001b[0;32m    203\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSQL statement \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mstatement\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m failed: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexception\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    204\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexception\u001b[39;00m\n",
      "File \u001b[1;32mD:\\a\\apsw\\apsw\\src\\vtable.c:2350\u001b[0m, in \u001b[0;36mVirtualTable.xFilter\u001b[1;34m()\u001b[0m\n",
      "File \u001b[1;32mc:\\Users\\edgar\\OneDrive\\Bureau\\Ecole\\HEC\\A24\\BrevetNLP\\.conda\\Lib\\site-packages\\alexandria3k\\data_source.py:220\u001b[0m, in \u001b[0;36mElementsCursor.Filter\u001b[1;34m(self, *args)\u001b[0m\n\u001b[0;32m    217\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mFilter\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs):\n\u001b[0;32m    218\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Always called first to initialize an iteration to the first row\u001b[39;00m\n\u001b[0;32m    219\u001b[0m \u001b[38;5;124;03m    of the table\"\"\"\u001b[39;00m\n\u001b[1;32m--> 220\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparent_cursor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mFilter\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    221\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39melements \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m    222\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mNext()\n",
      "File \u001b[1;32mc:\\Users\\edgar\\OneDrive\\Bureau\\Ecole\\HEC\\A24\\BrevetNLP\\.conda\\Lib\\site-packages\\alexandria3k\\data_sources\\uspto.py:369\u001b[0m, in \u001b[0;36mPatentsCursor.Filter\u001b[1;34m(self, index_number, _index_name, constraint_args)\u001b[0m\n\u001b[0;32m    366\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mFilter\u001b[39m(\u001b[38;5;28mself\u001b[39m, index_number, _index_name, constraint_args):\n\u001b[0;32m    367\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Always called first to initialize an iteration to the first row\u001b[39;00m\n\u001b[0;32m    368\u001b[0m \u001b[38;5;124;03m    of the table according to the index\"\"\"\u001b[39;00m\n\u001b[1;32m--> 369\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfiles_cursor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mFilter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindex_number\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_index_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconstraint_args\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    370\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39meof \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfiles_cursor\u001b[38;5;241m.\u001b[39mEof()\n\u001b[0;32m    371\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m index_number \u001b[38;5;241m&\u001b[39m ROWID_INDEX:\n\u001b[0;32m    372\u001b[0m         \u001b[38;5;66;03m# This has never happened, so this is untested\u001b[39;00m\n",
      "File \u001b[1;32mc:\\Users\\edgar\\OneDrive\\Bureau\\Ecole\\HEC\\A24\\BrevetNLP\\.conda\\Lib\\site-packages\\alexandria3k\\data_sources\\uspto.py:260\u001b[0m, in \u001b[0;36mPatentsFilesCursor.Filter\u001b[1;34m(self, index_number, _index_name, constraint_args)\u001b[0m\n\u001b[0;32m    256\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontainer_id \u001b[38;5;241m=\u001b[39m constraint_args[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m    257\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcurrent_file_path \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m    258\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtable\u001b[38;5;241m.\u001b[39mdata_source\u001b[38;5;241m.\u001b[39mget_current_zip_path()\n\u001b[0;32m    259\u001b[0m     )\n\u001b[1;32m--> 260\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mNext\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mc:\\Users\\edgar\\OneDrive\\Bureau\\Ecole\\HEC\\A24\\BrevetNLP\\.conda\\Lib\\site-packages\\alexandria3k\\data_sources\\uspto.py:270\u001b[0m, in \u001b[0;36mPatentsFilesCursor.Next\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    268\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontainer_id \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m    269\u001b[0m \u001b[38;5;66;03m# Zip file read.\u001b[39;00m\n\u001b[1;32m--> 270\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mxml_contents \u001b[38;5;241m=\u001b[39m \u001b[43mget_zip_cache\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    271\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcurrent_file_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdata_source\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msample\u001b[49m\n\u001b[0;32m    272\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    274\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontainer_id \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mxml_contents):\n\u001b[0;32m    275\u001b[0m     \u001b[38;5;66;03m# Zip file ended.\u001b[39;00m\n\u001b[0;32m    276\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m    277\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtable\u001b[38;5;241m.\u001b[39mdata_source\u001b[38;5;241m.\u001b[39mlength_of_zip_files()\n\u001b[0;32m    278\u001b[0m         \u001b[38;5;241m>\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mzip_index \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m    279\u001b[0m     ):\n\u001b[0;32m    280\u001b[0m         \u001b[38;5;66;03m# Moving to the next available Zip file.\u001b[39;00m\n\u001b[0;32m    281\u001b[0m         \u001b[38;5;66;03m# Updating new container id.\u001b[39;00m\n",
      "File \u001b[1;32mc:\\Users\\edgar\\OneDrive\\Bureau\\Ecole\\HEC\\A24\\BrevetNLP\\.conda\\Lib\\site-packages\\alexandria3k\\uspto_zip_cache.py:58\u001b[0m, in \u001b[0;36mUsptoZipCache.read\u001b[1;34m(self, zip_path, sampling)\u001b[0m\n\u001b[0;32m     53\u001b[0m xml_file \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m     54\u001b[0m     file \u001b[38;5;28;01mfor\u001b[39;00m file \u001b[38;5;129;01min\u001b[39;00m zip_ref\u001b[38;5;241m.\u001b[39mnamelist() \u001b[38;5;28;01mif\u001b[39;00m file\u001b[38;5;241m.\u001b[39mendswith(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.xml\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m     55\u001b[0m ]\n\u001b[0;32m     57\u001b[0m \u001b[38;5;66;03m# Extract filename and decoding the XML\u001b[39;00m\n\u001b[1;32m---> 58\u001b[0m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfile_name,) \u001b[38;5;241m=\u001b[39m xml_file\n\u001b[0;32m     59\u001b[0m xml_content \u001b[38;5;241m=\u001b[39m zip_ref\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfile_name)\u001b[38;5;241m.\u001b[39mdecode(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m     61\u001b[0m \u001b[38;5;66;03m# The first item of the list is None.\u001b[39;00m\n",
      "\u001b[1;31mValueError\u001b[0m: not enough values to unpack (expected 1, got 0)"
     ]
    }
   ],
   "source": [
    "query = \"\"\"\n",
    "SELECT\n",
    "    patent_id, patcit_doc_number, patcit_country, patcit_kind, patcit_date, category\n",
    "FROM usp_citations\n",
    "\"\"\"\n",
    "i=0\n",
    "for row in tqdm(uspto_data.query(query)):\n",
    "    print(row)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "0it [00:00, ?it/s]\n"
     ]
    },
    {
     "ename": "ConnectionClosedError",
     "evalue": "The connection has been closed",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mConnectionClosedError\u001b[0m                     Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[38], line 7\u001b[0m\n\u001b[0;32m      1\u001b[0m query \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;124mSELECT\u001b[39m\n\u001b[0;32m      3\u001b[0m \u001b[38;5;124m    patent_id, patcit_doc_number, patcit_country, patcit_kind, patcit_date, category\u001b[39m\n\u001b[0;32m      4\u001b[0m \u001b[38;5;124mFROM usp_citations\u001b[39m\n\u001b[0;32m      5\u001b[0m \u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[0;32m      6\u001b[0m i\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m\n\u001b[1;32m----> 7\u001b[0m \u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrow\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mtqdm\u001b[49m\u001b[43m(\u001b[49m\u001b[43muspto_data\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[0;32m      8\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43mprint\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mrow\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[1;32mc:\\Users\\edgar\\OneDrive\\Bureau\\Ecole\\HEC\\A24\\BrevetNLP\\.conda\\Lib\\site-packages\\tqdm\\std.py:1181\u001b[0m, in \u001b[0;36mtqdm.__iter__\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m   1178\u001b[0m time \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_time\n\u001b[0;32m   1180\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m-> 1181\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43miterable\u001b[49m\u001b[43m:\u001b[49m\n\u001b[0;32m   1182\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43;01myield\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\n\u001b[0;32m   1183\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# Update and possibly print the progressbar.\u001b[39;49;00m\n\u001b[0;32m   1184\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# Note: does not call self.update(1) for speed optimisation.\u001b[39;49;00m\n",
      "File \u001b[1;32mc:\\Users\\edgar\\OneDrive\\Bureau\\Ecole\\HEC\\A24\\BrevetNLP\\.conda\\Lib\\site-packages\\alexandria3k\\data_source.py:702\u001b[0m, in \u001b[0;36mDataSource.query\u001b[1;34m(self, query, partition)\u001b[0m\n\u001b[0;32m    675\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mquery\u001b[39m(\u001b[38;5;28mself\u001b[39m, query, partition\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[0;32m    676\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    677\u001b[0m \u001b[38;5;124;03m    Run the specified query on the virtual database using the data\u001b[39;00m\n\u001b[0;32m    678\u001b[0m \u001b[38;5;124;03m    specified in the object constructor's call.\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    699\u001b[0m \u001b[38;5;124;03m    :rtype: iterable\u001b[39;00m\n\u001b[0;32m    700\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m--> 702\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcursor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvdb\u001b[38;5;241m.\u001b[39mcursor()\n\u001b[0;32m    704\u001b[0m     \u001b[38;5;66;03m# Easy case\u001b[39;00m\n\u001b[0;32m    705\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m partition:\n",
      "\u001b[1;31mConnectionClosedError\u001b[0m: The connection has been closed"
     ]
    }
   ],
   "source": [
    "query = \"\"\"\n",
    "SELECT\n",
    "    patent_id, patcit_doc_number, patcit_country, patcit_kind, patcit_date, category\n",
    "FROM usp_citations\n",
    "\"\"\"\n",
    "i=0\n",
    "for row in tqdm(uspto_data.query(query)):\n",
    "    print(row)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}

GitHub Events

Total
  • Push event: 32
  • Create event: 2
Last Year
  • Push event: 32
  • Create event: 2