{"id":20367,"name":"wpextract","description":"Create datasets from WordPress sites for research or archiving","url":"https://github.com/gatenlp/wpextract","last_synced_at":"2025-09-05T00:35:44.069Z","repository":{"id":202629659,"uuid":"573084559","full_name":"GateNLP/wpextract","owner":"GateNLP","description":"Create datasets from WordPress sites for research or archiving","archived":false,"fork":false,"pushed_at":"2025-03-31T14:19:59.000Z","size":2033,"stargazers_count":4,"open_issues_count":4,"forks_count":0,"subscribers_count":3,"default_branch":"main","last_synced_at":"2025-09-01T03:58:02.478Z","etag":null,"topics":["corpus","crawler","nlp","text-extraction","text-mining","web-scraping","wordpress"],"latest_commit_sha":null,"homepage":"https://wpextract.readthedocs.io","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"apache-2.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/GateNLP.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":".github/CONTRIBUTING.md","funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":"CITATION.cff","codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null}},"created_at":"2022-12-01T17:01:26.000Z","updated_at":"2025-05-11T04:29:22.000Z","dependencies_parsed_at":"2025-04-12T23:21:57.550Z","dependency_job_id":"110601e1-bf63-46e8-b7c2-51fcc62c5731","html_url":"https://github.com/GateNLP/wpextract","commit_stats":null,"previous_names":["gatenlp/wordpress-site-extractor","gatenlp/wpextract"],"tags_count":8,"template":false,"template_full_name":null,"purl":"pkg:github/GateNLP/wpextract","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/GateNLP","download_url":"https://codeload.github.com/GateNLP/wpextract/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":273695031,"owners_count":25151480,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-09-04T02:00:08.968Z","response_time":61,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"owner":{"login":"GateNLP","name":"GateNLP","uuid":"14233746","kind":"organization","description":"GATE - General Architecture for Text Engineering","email":null,"website":"https://gate.ac.uk/","location":"Sheffield, UK","twitter":"gateAcUk","company":null,"icon_url":"https://avatars.githubusercontent.com/u/14233746?v=4","repositories_count":170,"last_synced_at":"2024-04-16T05:59:14.383Z","metadata":{"has_sponsors_listing":false},"html_url":"https://github.com/GateNLP","funding_links":[],"total_stars":408,"followers":38,"following":0,"created_at":"2022-11-03T15:50:30.963Z","updated_at":"2024-04-16T05:59:37.855Z","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/GateNLP","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/GateNLP/repositories"},"packages":[{"id":10599181,"name":"wpextract","ecosystem":"pypi","description":"Create datasets from WordPress sites","homepage":"https://wpextract.readthedocs.io/","licenses":"Apache-2.0","normalized_licenses":["Apache-2.0"],"repository_url":"https://github.com/GateNLP/wpextract","keywords_array":[],"namespace":null,"versions_count":9,"first_release_published_at":"2024-07-09T17:39:20.000Z","latest_release_published_at":"2025-01-20T15:44:29.000Z","latest_release_number":"1.1.1","last_synced_at":"2025-08-27T18:18:43.377Z","created_at":"2024-07-09T17:46:52.659Z","updated_at":"2025-08-27T19:28:52.938Z","registry_url":"https://pypi.org/project/wpextract/","install_command":"pip install wpextract --index-url https://pypi.org/simple","documentation_url":"https://wpextract.readthedocs.io/","metadata":{"funding":null,"documentation":"https://wpextract.readthedocs.io/","classifiers":["Development Status :: 5 - Production/Stable","Environment :: Console","Intended Audience :: Developers","Intended Audience :: Science/Research","License :: OSI Approved :: Apache Software License","Operating System :: OS Independent","Programming Language :: Python :: 3","Programming Language :: Python :: 3.10","Programming Language :: Python :: 3.11","Programming Language :: Python :: 3.12","Programming Language :: Python :: 3.13","Programming Language :: Python :: 3.9","Topic :: Software Development :: Libraries :: Python Modules"],"normalized_name":"wpextract","project_status":null},"repo_metadata":{"id":202629659,"uuid":"573084559","full_name":"GateNLP/wpextract","owner":"GateNLP","description":"Create datasets from WordPress sites for research or archiving","archived":false,"fork":false,"pushed_at":"2025-03-31T14:19:59.000Z","size":2033,"stargazers_count":4,"open_issues_count":4,"forks_count":0,"subscribers_count":3,"default_branch":"main","last_synced_at":"2025-08-23T19:49:45.843Z","etag":null,"topics":["corpus","crawler","nlp","text-extraction","text-mining","web-scraping","wordpress"],"latest_commit_sha":null,"homepage":"https://wpextract.readthedocs.io","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"apache-2.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/GateNLP.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":".github/CONTRIBUTING.md","funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":"CITATION.cff","codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null}},"created_at":"2022-12-01T17:01:26.000Z","updated_at":"2025-05-11T04:29:22.000Z","dependencies_parsed_at":"2025-04-12T23:21:57.550Z","dependency_job_id":"110601e1-bf63-46e8-b7c2-51fcc62c5731","html_url":"https://github.com/GateNLP/wpextract","commit_stats":null,"previous_names":["gatenlp/wordpress-site-extractor","gatenlp/wpextract"],"tags_count":8,"template":false,"template_full_name":null,"purl":"pkg:github/GateNLP/wpextract","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/GateNLP","download_url":"https://codeload.github.com/GateNLP/wpextract/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":272367405,"owners_count":24922228,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-08-27T02:00:09.397Z","response_time":76,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"},"owner_record":{"login":"GateNLP","name":"GateNLP","uuid":"14233746","kind":"organization","description":"GATE - General Architecture for Text Engineering","email":null,"website":"https://gate.ac.uk/","location":"Sheffield, UK","twitter":"gateAcUk","company":null,"icon_url":"https://avatars.githubusercontent.com/u/14233746?v=4","repositories_count":170,"last_synced_at":"2024-04-16T05:59:14.383Z","metadata":{"has_sponsors_listing":false},"html_url":"https://github.com/GateNLP","funding_links":[],"total_stars":408,"followers":38,"following":0,"created_at":"2022-11-03T15:50:30.963Z","updated_at":"2024-04-16T05:59:37.855Z","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/GateNLP","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/GateNLP/repositories"},"tags":[{"name":"1.1.1","sha":"d4b0cd1d6a50afd4a661ef1aead80cc96792f1a4","kind":"tag","published_at":"2025-01-20T15:42:31.000Z","download_url":"https://codeload.github.com/GateNLP/wpextract/tar.gz/1.1.1","html_url":"https://github.com/GateNLP/wpextract/releases/tag/1.1.1","dependencies_parsed_at":null,"dependency_job_id":null,"purl":"pkg:github/GateNLP/wpextract@1.1.1","tag_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags/1.1.1","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags/1.1.1/manifests"},{"name":"1.1.0","sha":"8328435ee19a2c49d4cd002d090e63c43a68563e","kind":"tag","published_at":"2024-08-13T14:18:15.000Z","download_url":"https://codeload.github.com/GateNLP/wpextract/tar.gz/1.1.0","html_url":"https://github.com/GateNLP/wpextract/releases/tag/1.1.0","dependencies_parsed_at":null,"dependency_job_id":null,"purl":"pkg:github/GateNLP/wpextract@1.1.0","tag_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags/1.1.0","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags/1.1.0/manifests"},{"name":"1.0.3","sha":"aa8f449cd8a0b1d193632adc9b82c1cd71d7b32e","kind":"tag","published_at":"2024-08-06T16:00:41.000Z","download_url":"https://codeload.github.com/GateNLP/wpextract/tar.gz/1.0.3","html_url":"https://github.com/GateNLP/wpextract/releases/tag/1.0.3","dependencies_parsed_at":null,"dependency_job_id":null,"purl":"pkg:github/GateNLP/wpextract@1.0.3","tag_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags/1.0.3","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags/1.0.3/manifests"},{"name":"1.0.2","sha":"dd27d5ad8d1626f793c53c669393ca578a35214c","kind":"tag","published_at":"2024-07-12T11:19:32.000Z","download_url":"https://codeload.github.com/GateNLP/wpextract/tar.gz/1.0.2","html_url":"https://github.com/GateNLP/wpextract/releases/tag/1.0.2","dependencies_parsed_at":null,"dependency_job_id":null,"purl":"pkg:github/GateNLP/wpextract@1.0.2","tag_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags/1.0.2","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags/1.0.2/manifests"},{"name":"1.0.1post1","sha":"b14c61f6748717275cd2663d9626464911df93c7","kind":"tag","published_at":"2024-07-11T21:39:03.000Z","download_url":"https://codeload.github.com/GateNLP/wpextract/tar.gz/1.0.1post1","html_url":"https://github.com/GateNLP/wpextract/releases/tag/1.0.1post1","dependencies_parsed_at":null,"dependency_job_id":null,"purl":"pkg:github/GateNLP/wpextract@1.0.1post1","tag_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags/1.0.1post1","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags/1.0.1post1/manifests"},{"name":"1.0.1","sha":"86c92d9ccfd240feede3a9558a4b79732d433a59","kind":"tag","published_at":"2024-07-11T16:39:24.000Z","download_url":"https://codeload.github.com/GateNLP/wpextract/tar.gz/1.0.1","html_url":"https://github.com/GateNLP/wpextract/releases/tag/1.0.1","dependencies_parsed_at":null,"dependency_job_id":null,"purl":"pkg:github/GateNLP/wpextract@1.0.1","tag_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags/1.0.1","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags/1.0.1/manifests"},{"name":"1.0.0","sha":"8492067bf15a8ed0311633dfd20886a908a67cdb","kind":"tag","published_at":"2024-07-11T16:14:59.000Z","download_url":"https://codeload.github.com/GateNLP/wpextract/tar.gz/1.0.0","html_url":"https://github.com/GateNLP/wpextract/releases/tag/1.0.0","dependencies_parsed_at":null,"dependency_job_id":null,"purl":"pkg:github/GateNLP/wpextract@1.0.0","tag_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags/1.0.0","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags/1.0.0/manifests"},{"name":"1.0.0rc1","sha":"df1b997db1a0943692a979f39287c81ab108173c","kind":"tag","published_at":"2024-07-11T15:54:09.000Z","download_url":"https://codeload.github.com/GateNLP/wpextract/tar.gz/1.0.0rc1","html_url":"https://github.com/GateNLP/wpextract/releases/tag/1.0.0rc1","dependencies_parsed_at":null,"dependency_job_id":null,"purl":"pkg:github/GateNLP/wpextract@1.0.0rc1","tag_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags/1.0.0rc1","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/GateNLP%2Fwpextract/tags/1.0.0rc1/manifests"}]},"repo_metadata_updated_at":"2025-08-27T19:28:52.938Z","dependent_packages_count":0,"downloads":26,"downloads_period":"last-month","dependent_repos_count":0,"rankings":{"downloads":null,"dependent_repos_count":60.046570225783654,"dependent_packages_count":10.662669730377512,"stargazers_count":null,"forks_count":null,"docker_downloads_count":null,"average":35.354619978080585},"purl":"pkg:pypi/wpextract","advisories":[],"docker_usage_url":"https://docker.ecosyste.ms/usage/pypi/wpextract","docker_dependents_count":null,"docker_downloads_count":null,"usage_url":"https://repos.ecosyste.ms/usage/pypi/wpextract","dependent_repositories_url":"https://repos.ecosyste.ms/api/v1/usage/pypi/wpextract/dependencies","status":null,"funding_links":[],"critical":null,"issue_metadata":{"last_synced_at":"2025-06-25T18:48:49.917Z","issues_count":15,"pull_requests_count":59,"avg_time_to_close_issue":534309.6666666666,"avg_time_to_close_pull_request":126962.49152542373,"issues_closed_count":9,"pull_requests_closed_count":59,"pull_request_authors_count":2,"issue_authors_count":3,"avg_comments_per_issue":0.13333333333333333,"avg_comments_per_pull_request":0.0,"merged_pull_requests_count":58,"bot_issues_count":0,"bot_pull_requests_count":0,"past_year_issues_count":15,"past_year_pull_requests_count":57,"past_year_avg_time_to_close_issue":534309.6666666666,"past_year_avg_time_to_close_pull_request":131408.54385964913,"past_year_issues_closed_count":9,"past_year_pull_requests_closed_count":57,"past_year_pull_request_authors_count":2,"past_year_issue_authors_count":3,"past_year_avg_comments_per_issue":0.13333333333333333,"past_year_avg_comments_per_pull_request":0.0,"past_year_bot_issues_count":0,"past_year_bot_pull_requests_count":0,"past_year_merged_pull_requests_count":56,"issues_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/gatenlp%2Fwpextract/issues","maintainers":[{"login":"freddyheppell","count":52,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/freddyheppell"},{"login":"ianroberts","count":1,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/ianroberts"}],"active_maintainers":[{"login":"freddyheppell","count":4,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/freddyheppell"},{"login":"ianroberts","count":1,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/ianroberts"}]},"versions_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/wpextract/versions","version_numbers_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/wpextract/version_numbers","dependent_packages_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/wpextract/dependent_packages","related_packages_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages/wpextract/related_packages","maintainers":[{"uuid":"freddyheppell","login":"freddyheppell","name":null,"email":null,"url":null,"packages_count":4,"html_url":"https://pypi.org/user/freddyheppell/","role":"Owner","created_at":"2024-07-09T17:46:53.936Z","updated_at":"2024-07-09T17:46:53.936Z","packages_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/maintainers/freddyheppell/packages"}],"registry":{"name":"pypi.org","url":"https://pypi.org","ecosystem":"pypi","default":true,"packages_count":724943,"maintainers_count":308264,"namespaces_count":0,"keywords_count":237984,"github":"pypi","metadata":{"funded_packages_count":50481},"icon_url":"https://github.com/pypi.png","created_at":"2022-04-04T15:19:23.364Z","updated_at":"2025-09-04T05:23:51.177Z","packages_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/packages","maintainers_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/maintainers","namespaces_url":"https://packages.ecosyste.ms/api/v1/registries/pypi.org/namespaces"}}],"commits":{"id":5542096,"full_name":"gatenlp/wpextract","default_branch":"main","committers":[{"name":"Freddy Heppell","email":"freddy@freddyheppell.com","login":"freddyheppell","count":88},{"name":"Ian Roberts","email":"i.roberts@sheffield.ac.uk","login":"ianroberts","count":1}],"total_commits":89,"total_committers":2,"total_bot_commits":0,"total_bot_committers":0,"mean_commits":44.5,"dds":0.011235955056179803,"past_year_committers":[{"name":"Freddy Heppell","email":"freddy@freddyheppell.com","login":"freddyheppell","count":26},{"name":"Ian Roberts","email":"i.roberts@sheffield.ac.uk","login":"ianroberts","count":1}],"past_year_total_commits":27,"past_year_total_committers":2,"past_year_total_bot_commits":0,"past_year_total_bot_committers":0,"past_year_mean_commits":13.5,"past_year_dds":0.03703703703703709,"last_synced_at":"2025-06-25T20:49:32.234Z","last_synced_commit":"d4b0cd1d6a50afd4a661ef1aead80cc96792f1a4","created_at":"2024-11-13T18:31:15.262Z","updated_at":"2025-06-25T20:49:32.234Z","commits_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub/repositories/gatenlp%2Fwpextract/commits","host":{"name":"GitHub","url":"https://github.com","kind":"github","last_synced_at":"2025-09-05T00:00:10.343Z","repositories_count":5480019,"commits_count":853389012,"contributors_count":31098138,"owners_count":906558,"icon_url":"https://github.com/github.png","host_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub/repositories"}},"issues_stats":{"full_name":"gatenlp/wpextract","html_url":"https://github.com/gatenlp/wpextract","last_synced_at":"2025-08-31T10:45:48.479Z","status":null,"issues_count":12,"pull_requests_count":43,"avg_time_to_close_issue":284422.85714285716,"avg_time_to_close_pull_request":298439.72093023255,"issues_closed_count":7,"pull_requests_closed_count":43,"pull_request_authors_count":2,"issue_authors_count":3,"avg_comments_per_issue":0.08333333333333333,"avg_comments_per_pull_request":0.0,"merged_pull_requests_count":42,"bot_issues_count":0,"bot_pull_requests_count":0,"past_year_issues_count":3,"past_year_pull_requests_count":8,"past_year_avg_time_to_close_issue":138931.0,"past_year_avg_time_to_close_pull_request":1467837.0,"past_year_issues_closed_count":1,"past_year_pull_requests_closed_count":8,"past_year_pull_request_authors_count":2,"past_year_issue_authors_count":3,"past_year_avg_comments_per_issue":0.3333333333333333,"past_year_avg_comments_per_pull_request":0.0,"past_year_bot_issues_count":0,"past_year_bot_pull_requests_count":0,"past_year_merged_pull_requests_count":8,"created_at":"2024-11-13T18:31:15.993Z","updated_at":"2025-09-02T17:10:09.856Z","repository_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/gatenlp%2Fwpextract","issues_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/gatenlp%2Fwpextract/issues","issue_labels_count":{"documentation":2},"pull_request_labels_count":{},"issue_author_associations_count":{"MEMBER":3,"NONE":2},"pull_request_author_associations_count":{"MEMBER":58},"issue_authors":{"freddyheppell":3,"ducnguyenphanhoai":1,"ducnguyen04071996":1},"pull_request_authors":{"freddyheppell":56,"ianroberts":2},"host":{"name":"GitHub","url":"https://github.com","kind":"github","last_synced_at":"2025-09-02T00:00:07.986Z","repositories_count":10046970,"issues_count":30703881,"pull_requests_count":91794904,"authors_count":10467743,"icon_url":"https://github.com/github.png","host_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories","owners_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/owners","authors_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors"},"past_year_issue_labels_count":{"documentation":1},"past_year_pull_request_labels_count":{},"past_year_issue_author_associations_count":{"NONE":2,"MEMBER":1},"past_year_pull_request_author_associations_count":{"MEMBER":8},"past_year_issue_authors":{"ducnguyen04071996":1,"ducnguyenphanhoai":1,"freddyheppell":1},"past_year_pull_request_authors":{"freddyheppell":6,"ianroberts":2},"maintainers":[{"login":"freddyheppell","count":59,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/freddyheppell"},{"login":"ianroberts","count":2,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/ianroberts"}],"active_maintainers":[{"login":"freddyheppell","count":7,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/freddyheppell"},{"login":"ianroberts","count":2,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/ianroberts"}]},"events":{"total":{"CreateEvent":6,"IssuesEvent":4,"ReleaseEvent":1,"WatchEvent":2,"DeleteEvent":3,"IssueCommentEvent":2,"PushEvent":4,"PullRequestEvent":8},"last_year":{"CreateEvent":6,"IssuesEvent":4,"ReleaseEvent":1,"WatchEvent":2,"DeleteEvent":3,"IssueCommentEvent":2,"PushEvent":4,"PullRequestEvent":8}},"keywords":["corpus","crawler","nlp","text-extraction","text-mining","web-scraping","wordpress"],"dependencies":[{"ecosystem":"actions","filepath":".github/workflows/lint.yml","sha":null,"kind":"manifest","created_at":"2023-10-20T17:38:40.325Z","updated_at":"2023-10-20T17:38:40.325Z","repository_link":"https://github.com/GateNLP/wpextract/blob/main/.github/workflows/lint.yml","dependencies":[{"id":14266699455,"package_name":"actions/checkout","ecosystem":"actions","requirements":"v3","direct":true,"kind":"composite","optional":false},{"id":14266700111,"package_name":"actions/setup-python","ecosystem":"actions","requirements":"v3","direct":true,"kind":"composite","optional":false}]},{"ecosystem":"actions","filepath":".github/workflows/test.yml","sha":null,"kind":"manifest","created_at":"2023-10-20T17:38:41.243Z","updated_at":"2023-10-20T17:38:41.243Z","repository_link":"https://github.com/GateNLP/wpextract/blob/main/.github/workflows/test.yml","dependencies":[{"id":14266700315,"package_name":"actions/checkout","ecosystem":"actions","requirements":"v3","direct":true,"kind":"composite","optional":false},{"id":14266700316,"package_name":"actions/setup-python","ecosystem":"actions","requirements":"v3","direct":true,"kind":"composite","optional":false},{"id":14266700317,"package_name":"actions/setup-node","ecosystem":"actions","requirements":"v3","direct":true,"kind":"composite","optional":false}]},{"ecosystem":"pypi","filepath":"pyproject.toml","sha":null,"kind":"manifest","created_at":"2023-10-20T17:38:50.389Z","updated_at":"2023-10-20T17:38:50.389Z","repository_link":"https://github.com/GateNLP/wpextract/blob/main/pyproject.toml","dependencies":[]},{"ecosystem":"docker","filepath":"tests/e2e/tools/docker-compose.yml","sha":null,"kind":"manifest","created_at":"2024-11-13T18:44:45.057Z","updated_at":"2024-11-13T18:44:45.057Z","repository_link":"https://github.com/GateNLP/wpextract/blob/main/tests/e2e/tools/docker-compose.yml","dependencies":[{"id":20716454742,"package_name":"mariadb","ecosystem":"docker","requirements":"10.6.4-focal","direct":true,"kind":"runtime","optional":false},{"id":20716454743,"package_name":"wordpress","ecosystem":"docker","requirements":"6.6","direct":true,"kind":"runtime","optional":false}]},{"ecosystem":"actions","filepath":".github/workflows/publish.yml","sha":null,"kind":"manifest","created_at":"2024-07-11T16:48:26.231Z","updated_at":"2024-07-11T16:48:26.231Z","repository_link":"https://github.com/GateNLP/wpextract/blob/main/.github/workflows/publish.yml","dependencies":[{"id":19046533190,"package_name":"actions/checkout","ecosystem":"actions","requirements":"v4","direct":true,"kind":"composite","optional":false},{"id":19046533191,"package_name":"actions/setup-python","ecosystem":"actions","requirements":"v5","direct":true,"kind":"composite","optional":false},{"id":19046533192,"package_name":"actions/upload-artifact","ecosystem":"actions","requirements":"v3","direct":true,"kind":"composite","optional":false},{"id":19046533193,"package_name":"actions/download-artifact","ecosystem":"actions","requirements":"v3","direct":true,"kind":"composite","optional":false},{"id":19046533194,"package_name":"pypa/gh-action-pypi-publish","ecosystem":"actions","requirements":"release/v1","direct":true,"kind":"composite","optional":false},{"id":19046533195,"package_name":"sigstore/gh-action-sigstore-python","ecosystem":"actions","requirements":"v2.1.1","direct":true,"kind":"composite","optional":false}]},{"ecosystem":"pypi","filepath":"poetry.lock","sha":null,"kind":"lockfile","created_at":"2024-07-11T16:48:26.349Z","updated_at":"2024-07-11T16:48:26.349Z","repository_link":"https://github.com/GateNLP/wpextract/blob/main/poetry.lock","dependencies":[{"id":19046533196,"package_name":"babel","ecosystem":"pypi","requirements":"2.15.0","direct":false,"kind":"runtime","optional":false},{"id":19046533197,"package_name":"beautifulsoup4","ecosystem":"pypi","requirements":"4.12.3","direct":false,"kind":"runtime","optional":false},{"id":19046533198,"package_name":"black","ecosystem":"pypi","requirements":"24.4.2","direct":false,"kind":"runtime","optional":false},{"id":19046533199,"package_name":"build","ecosystem":"pypi","requirements":"0.9.0","direct":false,"kind":"runtime","optional":false},{"id":19046533200,"package_name":"certifi","ecosystem":"pypi","requirements":"2024.7.4","direct":false,"kind":"runtime","optional":false},{"id":19046533201,"package_name":"charset-normalizer","ecosystem":"pypi","requirements":"3.3.2","direct":false,"kind":"runtime","optional":false},{"id":19046533202,"package_name":"click","ecosystem":"pypi","requirements":"8.1.7","direct":false,"kind":"runtime","optional":false},{"id":19046533203,"package_name":"click-option-group","ecosystem":"pypi","requirements":"0.5.6","direct":false,"kind":"runtime","optional":false},{"id":19046533204,"package_name":"colorama","ecosystem":"pypi","requirements":"0.4.6","direct":false,"kind":"runtime","optional":false},{"id":19046533205,"package_name":"coverage","ecosystem":"pypi","requirements":"7.5.4","direct":false,"kind":"runtime","optional":false},{"id":19046533206,"package_name":"exceptiongroup","ecosystem":"pypi","requirements":"1.2.1","direct":false,"kind":"runtime","optional":false},{"id":19046533207,"package_name":"ghp-import","ecosystem":"pypi","requirements":"2.1.0","direct":false,"kind":"runtime","optional":false},{"id":19046533208,"package_name":"griffe","ecosystem":"pypi","requirements":"0.47.0","direct":false,"kind":"runtime","optional":false},{"id":19046533209,"package_name":"idna","ecosystem":"pypi","requirements":"3.7","direct":false,"kind":"runtime","optional":false},{"id":19046533210,"package_name":"importlib-metadata","ecosystem":"pypi","requirements":"8.0.0","direct":false,"kind":"runtime","optional":false},{"id":19046533211,"package_name":"iniconfig","ecosystem":"pypi","requirements":"2.0.0","direct":false,"kind":"runtime","optional":false},{"id":19046533212,"package_name":"jinja2","ecosystem":"pypi","requirements":"3.1.4","direct":false,"kind":"runtime","optional":false},{"id":19046533213,"package_name":"langcodes","ecosystem":"pypi","requirements":"3.4.0","direct":false,"kind":"runtime","optional":false},{"id":19046533214,"package_name":"language-data","ecosystem":"pypi","requirements":"1.2.0","direct":false,"kind":"runtime","optional":false},{"id":19046533215,"package_name":"lxml","ecosystem":"pypi","requirements":"5.2.2","direct":false,"kind":"runtime","optional":false},{"id":19046533216,"package_name":"marisa-trie","ecosystem":"pypi","requirements":"1.2.0","direct":false,"kind":"runtime","optional":false},{"id":19046533217,"package_name":"markdown","ecosystem":"pypi","requirements":"3.6","direct":false,"kind":"runtime","optional":false},{"id":19046533218,"package_name":"markupsafe","ecosystem":"pypi","requirements":"2.1.5","direct":false,"kind":"runtime","optional":false},{"id":19046533219,"package_name":"mergedeep","ecosystem":"pypi","requirements":"1.3.4","direct":false,"kind":"runtime","optional":false},{"id":19046533220,"package_name":"mkdocs","ecosystem":"pypi","requirements":"1.6.0","direct":false,"kind":"runtime","optional":false},{"id":19046533221,"package_name":"mkdocs-autorefs","ecosystem":"pypi","requirements":"1.0.1","direct":false,"kind":"runtime","optional":false},{"id":19046533222,"package_name":"mkdocs-get-deps","ecosystem":"pypi","requirements":"0.2.0","direct":false,"kind":"runtime","optional":false},{"id":19046533223,"package_name":"mkdocs-material","ecosystem":"pypi","requirements":"9.5.28","direct":false,"kind":"runtime","optional":false},{"id":19046533224,"package_name":"mkdocs-material-extensions","ecosystem":"pypi","requirements":"1.3.1","direct":false,"kind":"runtime","optional":false},{"id":19046533225,"package_name":"mkdocstrings","ecosystem":"pypi","requirements":"0.25.1","direct":false,"kind":"runtime","optional":false},{"id":19046533226,"package_name":"mkdocstrings-python","ecosystem":"pypi","requirements":"1.10.5","direct":false,"kind":"runtime","optional":false},{"id":19046533227,"package_name":"mypy-extensions","ecosystem":"pypi","requirements":"1.0.0","direct":false,"kind":"runtime","optional":false},{"id":19046533228,"package_name":"numpy","ecosystem":"pypi","requirements":"2.0.0","direct":false,"kind":"runtime","optional":false},{"id":19046533229,"package_name":"packaging","ecosystem":"pypi","requirements":"24.1","direct":false,"kind":"runtime","optional":false},{"id":19046533230,"package_name":"paginate","ecosystem":"pypi","requirements":"0.5.6","direct":false,"kind":"runtime","optional":false},{"id":19046533231,"package_name":"pandas","ecosystem":"pypi","requirements":"2.2.2","direct":false,"kind":"runtime","optional":false},{"id":19046533232,"package_name":"pathspec","ecosystem":"pypi","requirements":"0.12.1","direct":false,"kind":"runtime","optional":false},{"id":19046533233,"package_name":"pep517","ecosystem":"pypi","requirements":"0.13.1","direct":false,"kind":"runtime","optional":false},{"id":19046533234,"package_name":"platformdirs","ecosystem":"pypi","requirements":"4.2.2","direct":false,"kind":"runtime","optional":false},{"id":19046533235,"package_name":"pluggy","ecosystem":"pypi","requirements":"1.5.0","direct":false,"kind":"runtime","optional":false},{"id":19046533236,"package_name":"pygments","ecosystem":"pypi","requirements":"2.18.0","direct":false,"kind":"runtime","optional":false},{"id":19046533237,"package_name":"pymdown-extensions","ecosystem":"pypi","requirements":"10.8.1","direct":false,"kind":"runtime","optional":false},{"id":19046533238,"package_name":"pytest","ecosystem":"pypi","requirements":"8.2.2","direct":false,"kind":"runtime","optional":false},{"id":19046533239,"package_name":"pytest-datadir","ecosystem":"pypi","requirements":"1.5.0","direct":false,"kind":"runtime","optional":false},{"id":19046533240,"package_name":"pytest-mock","ecosystem":"pypi","requirements":"3.14.0","direct":false,"kind":"runtime","optional":false},{"id":19046533241,"package_name":"python-dateutil","ecosystem":"pypi","requirements":"2.9.0.post0","direct":false,"kind":"runtime","optional":false},{"id":19046533242,"package_name":"pytz","ecosystem":"pypi","requirements":"2024.1","direct":false,"kind":"runtime","optional":false},{"id":19046533243,"package_name":"pyyaml","ecosystem":"pypi","requirements":"6.0.1","direct":false,"kind":"runtime","optional":false},{"id":19046533244,"package_name":"pyyaml-env-tag","ecosystem":"pypi","requirements":"0.1","direct":false,"kind":"runtime","optional":false},{"id":19046533245,"package_name":"regex","ecosystem":"pypi","requirements":"2024.5.15","direct":false,"kind":"runtime","optional":false},{"id":19046533246,"package_name":"requests","ecosystem":"pypi","requirements":"2.32.3","direct":false,"kind":"runtime","optional":false},{"id":19046533247,"package_name":"responses","ecosystem":"pypi","requirements":"0.25.3","direct":false,"kind":"runtime","optional":false},{"id":19046533248,"package_name":"ruff","ecosystem":"pypi","requirements":"0.5.1","direct":false,"kind":"runtime","optional":false},{"id":19046533249,"package_name":"setuptools","ecosystem":"pypi","requirements":"70.3.0","direct":false,"kind":"runtime","optional":false},{"id":19046533250,"package_name":"six","ecosystem":"pypi","requirements":"1.16.0","direct":false,"kind":"runtime","optional":false},{"id":19046533251,"package_name":"soupsieve","ecosystem":"pypi","requirements":"2.5","direct":false,"kind":"runtime","optional":false},{"id":19046533252,"package_name":"tomli","ecosystem":"pypi","requirements":"2.0.1","direct":false,"kind":"runtime","optional":false},{"id":19046533253,"package_name":"tqdm","ecosystem":"pypi","requirements":"4.66.4","direct":false,"kind":"runtime","optional":false},{"id":19046533254,"package_name":"typing-extensions","ecosystem":"pypi","requirements":"4.12.2","direct":false,"kind":"runtime","optional":false},{"id":19046533255,"package_name":"tzdata","ecosystem":"pypi","requirements":"2024.1","direct":false,"kind":"runtime","optional":false},{"id":19046533256,"package_name":"urllib3","ecosystem":"pypi","requirements":"2.2.2","direct":false,"kind":"runtime","optional":false},{"id":19046533257,"package_name":"watchdog","ecosystem":"pypi","requirements":"4.0.1","direct":false,"kind":"runtime","optional":false},{"id":19046533258,"package_name":"zipp","ecosystem":"pypi","requirements":"3.19.2","direct":false,"kind":"runtime","optional":false}]}],"score":6.06842558824411,"created_at":"2025-09-04T15:50:51.768Z","updated_at":"2025-10-07T08:10:00.613Z","avatar_url":"https://github.com/GateNLP.png","language":"Python","category":null,"sub_category":null,"monthly_downloads":26,"funding_links":[],"readme_doi_urls":[],"works":{},"citation_counts":{},"total_citations":0,"keywords_from_contributors":[],"project_url":"https://science.ecosyste.ms/api/v1/projects/20367","html_url":"https://science.ecosyste.ms/projects/20367","bibtex_url":"https://science.ecosyste.ms/projects/20367/export.bibtex","apalike_url":"https://science.ecosyste.ms/projects/20367/export.apalike"}