{"id":31111,"name":"edspdf","description":"EDS-PDF is a generic, pure-Python framework for text extraction from PDF documents. It provides the machinery to use rule- or machine-learning-based approaches to classify text blocs between body and meta-data.","url":"https://github.com/aphp/edspdf","last_synced_at":"2025-09-05T05:03:35.551Z","repository":{"id":49501682,"uuid":"517726737","full_name":"aphp/edspdf","owner":"aphp","description":"EDS-PDF is a generic, pure-Python framework for text extraction from PDF documents. It provides the machinery to use rule- or machine-learning-based approaches to classify text blocs between body and meta-data.","archived":false,"fork":false,"pushed_at":"2025-02-12T14:12:06.000Z","size":9359,"stargazers_count":51,"open_issues_count":0,"forks_count":7,"subscribers_count":2,"default_branch":"main","last_synced_at":"2025-08-30T21:17:33.030Z","etag":null,"topics":["extraction","machine-learning","pdf"],"latest_commit_sha":null,"homepage":"https://aphp.github.io/edspdf/","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"bsd-3-clause","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/aphp.png","metadata":{"files":{"readme":"README.md","changelog":"changelog.md","contributing":"contributing.md","funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":"CITATION.cff","codeowners":null,"security":null,"support":null,"governance":null,"roadmap":"docs/roadmap.md","authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null}},"created_at":"2022-07-25T15:47:09.000Z","updated_at":"2025-06-30T10:14:05.000Z","dependencies_parsed_at":"2024-02-07T05:45:17.330Z","dependency_job_id":"4c9da124-9356-4fa9-9c75-b3bd8706f386","html_url":"https://github.com/aphp/edspdf","commit_stats":{"total_commits":293,"total_committers":6,"mean_commits":"48.833333333333336","dds":0.4300341296928327,"last_synced_commit":"5778c0e1ec01c9a0835832938cc3b49b5d051ffc"},"previous_names":[],"tags_count":12,"template":false,"template_full_name":null,"purl":"pkg:github/aphp/edspdf","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/aphp%2Fedspdf","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/aphp%2Fedspdf/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/aphp%2Fedspdf/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/aphp%2Fedspdf/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/aphp","download_url":"https://codeload.github.com/aphp/edspdf/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/aphp%2Fedspdf/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":273713621,"owners_count":25154614,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-09-05T02:00:09.113Z","response_time":402,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"owner":{"login":"aphp","name":"Greater Paris University Hospitals (AP-HP)","uuid":"26802401","kind":"organization","description":"","email":null,"website":"https://www.aphp.fr/","location":"Paris","twitter":null,"company":null,"icon_url":"https://avatars.githubusercontent.com/u/26802401?v=4","repositories_count":35,"last_synced_at":"2025-04-13T00:03:05.668Z","metadata":{"has_sponsors_listing":false},"html_url":"https://github.com/aphp","funding_links":[],"total_stars":451,"followers":61,"following":0,"created_at":"2022-11-14T09:30:39.731Z","updated_at":"2025-04-13T00:03:05.668Z","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/aphp","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/aphp/repositories"},"packages":null,"commits":{"id":684809,"full_name":"aphp/edspdf","default_branch":"main","committers":[{"name":"Perceval Wajsbürt","email":"perceval.wajsburt-ext@aphp.fr","login":"percevalw","count":172},{"name":"Basile Dura","email":"basile.dura-ext@aphp.fr","login":null,"count":75},{"name":"Basile Dura","email":"basile@bdura.me","login":"bdura","count":48},{"name":"acalliger","email":"alice.calliger@gmail.com","login":"acalliger","count":1},{"name":"Ian Fox","email":"iansfox@pm.me","login":"ian-fox","count":1},{"name":"alice.calliger","email":"alice.calliger-ext@aphp.fr","login":null,"count":1}],"total_commits":298,"total_committers":6,"total_bot_commits":0,"total_bot_committers":0,"mean_commits":49.666666666666664,"dds":0.42281879194630867,"past_year_committers":[{"name":"Perceval Wajsbürt","email":"perceval.wajsburt@aphp.fr","login":"percevalw","count":12}],"past_year_total_commits":12,"past_year_total_committers":1,"past_year_total_bot_commits":0,"past_year_total_bot_committers":0,"past_year_mean_commits":12.0,"past_year_dds":0.0,"last_synced_at":"2025-05-23T17:17:31.270Z","last_synced_commit":"7177a29534181686d035c85fa667784e7d930482","created_at":"2023-03-09T11:06:14.150Z","updated_at":"2025-05-23T17:17:31.302Z","commits_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub/repositories/aphp%2Fedspdf/commits","host":{"name":"GitHub","url":"https://github.com","kind":"github","last_synced_at":"2025-08-26T00:00:11.761Z","repositories_count":5479765,"commits_count":853308191,"contributors_count":31095157,"owners_count":906558,"icon_url":"https://github.com/github.png","host_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub/repositories"}},"issues_stats":{"full_name":"aphp/edspdf","html_url":"https://github.com/aphp/edspdf","last_synced_at":"2025-08-31T16:49:25.511Z","status":null,"issues_count":3,"pull_requests_count":31,"avg_time_to_close_issue":1945783.6666666667,"avg_time_to_close_pull_request":948631.1,"issues_closed_count":3,"pull_requests_closed_count":30,"pull_request_authors_count":4,"issue_authors_count":3,"avg_comments_per_issue":0.6666666666666666,"avg_comments_per_pull_request":1.032258064516129,"merged_pull_requests_count":29,"bot_issues_count":0,"bot_pull_requests_count":0,"past_year_issues_count":1,"past_year_pull_requests_count":5,"past_year_avg_time_to_close_issue":177672.0,"past_year_avg_time_to_close_pull_request":22879.25,"past_year_issues_closed_count":1,"past_year_pull_requests_closed_count":4,"past_year_pull_request_authors_count":1,"past_year_issue_authors_count":1,"past_year_avg_comments_per_issue":1.0,"past_year_avg_comments_per_pull_request":2.0,"past_year_bot_issues_count":0,"past_year_bot_pull_requests_count":0,"past_year_merged_pull_requests_count":4,"created_at":"2023-05-15T04:19:13.586Z","updated_at":"2025-09-02T21:12:21.044Z","repository_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/aphp%2Fedspdf","issues_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/aphp%2Fedspdf/issues","issue_labels_count":{},"pull_request_labels_count":{},"issue_author_associations_count":{"MEMBER":1,"NONE":1,"CONTRIBUTOR":1},"pull_request_author_associations_count":{"MEMBER":20,"CONTRIBUTOR":8,"COLLABORATOR":5,"NONE":1},"issue_authors":{"bdura":1,"percevalw":1,"xav-hydra":1},"pull_request_authors":{"percevalw":23,"bdura":7,"acalliger":2,"ian-fox":1,"JeremyMelton":1},"host":{"name":"GitHub","url":"https://github.com","kind":"github","last_synced_at":"2025-09-05T00:00:10.444Z","repositories_count":10086361,"issues_count":31286210,"pull_requests_count":96411606,"authors_count":10695498,"icon_url":"https://github.com/github.png","host_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories","owners_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/owners","authors_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors"},"past_year_issue_labels_count":{},"past_year_pull_request_labels_count":{},"past_year_issue_author_associations_count":{"NONE":1},"past_year_pull_request_author_associations_count":{"MEMBER":6},"past_year_issue_authors":{"xav-hydra":1},"past_year_pull_request_authors":{"percevalw":5,"JeremyMelton":1},"maintainers":[{"login":"percevalw","count":24,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/percevalw"},{"login":"acalliger","count":1,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/acalliger"},{"login":"JeremyMelton","count":1,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/JeremyMelton"}],"active_maintainers":[{"login":"percevalw","count":5,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/percevalw"},{"login":"JeremyMelton","count":1,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/JeremyMelton"}]},"events":{"total":{"CreateEvent":8,"ReleaseEvent":5,"IssuesEvent":2,"WatchEvent":12,"DeleteEvent":3,"IssueCommentEvent":17,"PushEvent":24,"PullRequestEvent":7,"ForkEvent":1},"last_year":{"CreateEvent":8,"ReleaseEvent":5,"IssuesEvent":2,"WatchEvent":12,"DeleteEvent":3,"IssueCommentEvent":17,"PushEvent":24,"PullRequestEvent":7,"ForkEvent":1}},"keywords":["extraction","machine-learning","pdf"],"dependencies":[{"ecosystem":"pypi","filepath":"demo/requirements.txt","sha":null,"kind":"manifest","created_at":"2022-09-26T21:51:28.812Z","updated_at":"2022-09-26T21:51:28.812Z","repository_link":"https://github.com/aphp/edspdf/blob/main/demo/requirements.txt","dependencies":[{"id":6166017081,"package_name":"streamlit","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false}]},{"ecosystem":"pypi","filepath":"pyproject.toml","sha":null,"kind":"manifest","created_at":"2022-09-26T21:51:29.042Z","updated_at":"2022-09-26T21:51:29.042Z","repository_link":"https://github.com/aphp/edspdf/blob/main/pyproject.toml","dependencies":[{"id":6166017469,"package_name":"python","ecosystem":"pypi","requirements":"\u003e=3.7.1,!=3.7.6,!=3.8.1,\u003c3.11","direct":true,"kind":"runtime","optional":false},{"id":6166017470,"package_name":"loguru","ecosystem":"pypi","requirements":"^0.6.0","direct":true,"kind":"runtime","optional":false},{"id":6166017471,"package_name":"pandas","ecosystem":"pypi","requirements":"^1.2","direct":true,"kind":"runtime","optional":false},{"id":6166017472,"package_name":"scikit-learn","ecosystem":"pypi","requirements":"^1.0.2","direct":true,"kind":"runtime","optional":false},{"id":6166017473,"package_name":"pdfminer.six","ecosystem":"pypi","requirements":"^20220319","direct":true,"kind":"runtime","optional":false},{"id":6166017474,"package_name":"pydantic","ecosystem":"pypi","requirements":"^1.2","direct":true,"kind":"runtime","optional":false},{"id":6166017475,"package_name":"catalogue","ecosystem":"pypi","requirements":"^2.0.7","direct":true,"kind":"runtime","optional":false},{"id":6166017476,"package_name":"thinc","ecosystem":"pypi","requirements":"^8.0.15","direct":true,"kind":"runtime","optional":false},{"id":6166017477,"package_name":"scipy","ecosystem":"pypi","requirements":"^1.7.0","direct":true,"kind":"runtime","optional":false},{"id":6166017478,"package_name":"networkx","ecosystem":"pypi","requirements":"^2.6","direct":true,"kind":"runtime","optional":false},{"id":6166017479,"package_name":"pypdfium2","ecosystem":"pypi","requirements":"^2.7.1","direct":true,"kind":"runtime","optional":false},{"id":6166017480,"package_name":"black","ecosystem":"pypi","requirements":"22.6.0","direct":true,"kind":"develop","optional":false},{"id":6166017481,"package_name":"flake8","ecosystem":"pypi","requirements":"\u003e=3.0","direct":true,"kind":"develop","optional":false},{"id":6166017482,"package_name":"pre-commit","ecosystem":"pypi","requirements":"^2.18.1","direct":true,"kind":"develop","optional":false},{"id":6166017483,"package_name":"pytest","ecosystem":"pypi","requirements":"^7.1.1","direct":true,"kind":"develop","optional":false},{"id":6166017484,"package_name":"pytest-cov","ecosystem":"pypi","requirements":"^3.0.0","direct":true,"kind":"develop","optional":false},{"id":6166017485,"package_name":"mike","ecosystem":"pypi","requirements":"^1.1.2","direct":true,"kind":"develop","optional":false},{"id":6166017486,"package_name":"mkdocs-bibtex","ecosystem":"pypi","requirements":"^2.0.3","direct":true,"kind":"develop","optional":false},{"id":6166017487,"package_name":"mkdocs-gen-files","ecosystem":"pypi","requirements":"^0.3.4","direct":true,"kind":"develop","optional":false},{"id":6166017488,"package_name":"mkdocs-literate-nav","ecosystem":"pypi","requirements":"^0.4.1","direct":true,"kind":"develop","optional":false},{"id":6166017489,"package_name":"mkdocs-material","ecosystem":"pypi","requirements":"^8.2.8","direct":true,"kind":"develop","optional":false},{"id":6166017490,"package_name":"mkdocstrings","ecosystem":"pypi","requirements":"^0.18.1","direct":true,"kind":"develop","optional":false},{"id":6166017491,"package_name":"mkdocstrings-python","ecosystem":"pypi","requirements":"^0.6.6","direct":true,"kind":"develop","optional":false},{"id":6166017492,"package_name":"streamlit","ecosystem":"pypi","requirements":"^1.8.1","direct":true,"kind":"develop","optional":false},{"id":6166017493,"package_name":"mypy","ecosystem":"pypi","requirements":"^0.950","direct":true,"kind":"develop","optional":false},{"id":6166017494,"package_name":"mkdocs-glightbox","ecosystem":"pypi","requirements":"^0.1.6","direct":true,"kind":"develop","optional":false},{"id":6166017495,"package_name":"mkdocs-autorefs","ecosystem":"pypi","requirements":"^0.4.1","direct":true,"kind":"develop","optional":false}]},{"ecosystem":"actions","filepath":".github/workflows/documentation.yml","sha":null,"kind":"manifest","created_at":"2023-01-29T13:00:50.070Z","updated_at":"2023-01-29T13:00:50.070Z","repository_link":"https://github.com/aphp/edspdf/blob/main/.github/workflows/documentation.yml","dependencies":[{"id":7225262447,"package_name":"actions/checkout","ecosystem":"actions","requirements":"v2","direct":true,"kind":"composite","optional":false},{"id":7225262448,"package_name":"actions/setup-python","ecosystem":"actions","requirements":"v2","direct":true,"kind":"composite","optional":false},{"id":7225262449,"package_name":"actions/cache","ecosystem":"actions","requirements":"v2","direct":true,"kind":"composite","optional":false}]},{"ecosystem":"actions","filepath":".github/workflows/release.yml","sha":null,"kind":"manifest","created_at":"2023-01-29T13:00:50.096Z","updated_at":"2023-01-29T13:00:50.096Z","repository_link":"https://github.com/aphp/edspdf/blob/main/.github/workflows/release.yml","dependencies":[{"id":7225262651,"package_name":"actions/checkout","ecosystem":"actions","requirements":"v2","direct":true,"kind":"composite","optional":false},{"id":7225262652,"package_name":"actions/setup-python","ecosystem":"actions","requirements":"v2","direct":true,"kind":"composite","optional":false},{"id":7225262653,"package_name":"actions/cache","ecosystem":"actions","requirements":"v2","direct":true,"kind":"composite","optional":false}]},{"ecosystem":"actions","filepath":".github/workflows/tests.yml","sha":null,"kind":"manifest","created_at":"2023-01-29T13:00:50.120Z","updated_at":"2023-01-29T13:00:50.120Z","repository_link":"https://github.com/aphp/edspdf/blob/main/.github/workflows/tests.yml","dependencies":[{"id":7225262668,"package_name":"actions/checkout","ecosystem":"actions","requirements":"v2","direct":true,"kind":"composite","optional":false},{"id":7225262669,"package_name":"actions/setup-python","ecosystem":"actions","requirements":"v2","direct":true,"kind":"composite","optional":false},{"id":7225262670,"package_name":"actions/cache","ecosystem":"actions","requirements":"v2","direct":true,"kind":"composite","optional":false},{"id":7225262671,"package_name":"codecov/codecov-action","ecosystem":"actions","requirements":"v2","direct":true,"kind":"composite","optional":false}]}],"score":5.723585101952381,"created_at":"2025-09-04T15:51:03.946Z","updated_at":"2025-10-07T08:13:20.075Z","avatar_url":"https://github.com/aphp.png","language":"Python","category":null,"sub_category":null,"monthly_downloads":0,"funding_links":[],"readme_doi_urls":[],"works":{},"citation_counts":{},"total_citations":0,"keywords_from_contributors":[],"project_url":"https://science.ecosyste.ms/api/v1/projects/31111","html_url":"https://science.ecosyste.ms/projects/31111","bibtex_url":"https://science.ecosyste.ms/projects/31111/export.bibtex","apalike_url":"https://science.ecosyste.ms/projects/31111/export.apalike"}