{"id":9611,"name":"corpus_text_processor","description":"A desktop application for preparing files for use in a corpus","url":"https://github.com/writecrow/corpus_text_processor","last_synced_at":"2025-09-04T20:33:13.752Z","repository":{"id":37261880,"uuid":"196025743","full_name":"writecrow/corpus_text_processor","owner":"writecrow","description":"A desktop application for preparing files for use in a corpus","archived":false,"fork":false,"pushed_at":"2024-01-09T15:42:32.000Z","size":28705,"stargazers_count":8,"open_issues_count":4,"forks_count":4,"subscribers_count":5,"default_branch":"main","last_synced_at":"2025-08-24T23:32:40.074Z","etag":null,"topics":["corpora","corpus-linguistics","desktop-app","text-processing"],"latest_commit_sha":null,"homepage":null,"language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"mit","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/writecrow.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":"CITATION.cff","codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null}},"created_at":"2019-07-09T14:32:53.000Z","updated_at":"2025-03-29T07:20:03.000Z","dependencies_parsed_at":"2025-04-11T16:33:09.788Z","dependency_job_id":"59acedb8-950d-4d17-8e6b-d3558d5c8358","html_url":"https://github.com/writecrow/corpus_text_processor","commit_stats":null,"previous_names":[],"tags_count":20,"template":false,"template_full_name":null,"purl":"pkg:github/writecrow/corpus_text_processor","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/writecrow%2Fcorpus_text_processor","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/writecrow%2Fcorpus_text_processor/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/writecrow%2Fcorpus_text_processor/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/writecrow%2Fcorpus_text_processor/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/writecrow","download_url":"https://codeload.github.com/writecrow/corpus_text_processor/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/writecrow%2Fcorpus_text_processor/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":273668942,"owners_count":25146859,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-09-04T02:00:08.968Z","response_time":61,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"owner":{"login":"writecrow","name":"Corpus \u0026 Repository of Writing (Crow)","uuid":"27739911","kind":"organization","description":"Crow brings together researchers at Purdue, Arizona, and other universities to create a web-based archive for writing studies.","email":"collaborate@writecrow.org","website":"https://writecrow.org","location":"Purdue University | University of Arizona","twitter":"writecroworg","company":null,"icon_url":"https://avatars.githubusercontent.com/u/27739911?v=4","repositories_count":16,"last_synced_at":"2023-03-02T16:25:31.988Z","metadata":{"has_sponsors_listing":false},"html_url":"https://github.com/writecrow","funding_links":[],"total_stars":null,"followers":null,"following":null,"created_at":"2022-11-13T07:47:27.027Z","updated_at":"2023-03-02T16:25:31.996Z","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/writecrow","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/writecrow/repositories"},"packages":[],"commits":{"id":6872534,"full_name":"writecrow/corpus_text_processor","default_branch":"main","committers":[{"name":"Mark Fullmer","email":"mfullmer@gmail.com","login":"markfullmer","count":53},{"name":"jmf3658","email":"jfullmer@austin.utexas.edu","login":"jmf3658","count":13},{"name":"Shelley Staples","email":"slstaples@email.arizona.edu","login":"slstaples","count":1}],"total_commits":67,"total_committers":3,"total_bot_commits":0,"total_bot_committers":0,"mean_commits":22.333333333333332,"dds":0.20895522388059706,"past_year_committers":[],"past_year_total_commits":0,"past_year_total_committers":0,"past_year_total_bot_commits":0,"past_year_total_bot_committers":0,"past_year_mean_commits":0.0,"past_year_dds":0.0,"last_synced_at":"2025-07-22T10:09:19.229Z","last_synced_commit":"c0654f3709fdc074af7962b3b502a50d681c02fe","created_at":"2024-11-26T06:13:54.637Z","updated_at":"2025-07-22T10:09:19.229Z","commits_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub/repositories/writecrow%2Fcorpus_text_processor/commits","host":{"name":"GitHub","url":"https://github.com","kind":"github","last_synced_at":"2025-09-04T00:00:25.936Z","repositories_count":5480019,"commits_count":853389012,"contributors_count":31098138,"owners_count":906558,"icon_url":"https://github.com/github.png","host_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub/repositories"}},"issues_stats":{"full_name":"writecrow/corpus_text_processor","html_url":"https://github.com/writecrow/corpus_text_processor","last_synced_at":"2025-08-24T21:35:14.872Z","status":"error","issues_count":8,"pull_requests_count":3,"avg_time_to_close_issue":1489834.25,"avg_time_to_close_pull_request":89715.66666666667,"issues_closed_count":4,"pull_requests_closed_count":3,"pull_request_authors_count":2,"issue_authors_count":2,"avg_comments_per_issue":0.625,"avg_comments_per_pull_request":0.6666666666666666,"merged_pull_requests_count":2,"bot_issues_count":0,"bot_pull_requests_count":1,"past_year_issues_count":0,"past_year_pull_requests_count":0,"past_year_avg_time_to_close_issue":null,"past_year_avg_time_to_close_pull_request":null,"past_year_issues_closed_count":0,"past_year_pull_requests_closed_count":0,"past_year_pull_request_authors_count":0,"past_year_issue_authors_count":0,"past_year_avg_comments_per_issue":null,"past_year_avg_comments_per_pull_request":null,"past_year_bot_issues_count":0,"past_year_bot_pull_requests_count":0,"past_year_merged_pull_requests_count":0,"created_at":"2024-11-26T06:13:56.189Z","updated_at":"2025-08-24T21:35:14.873Z","repository_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/writecrow%2Fcorpus_text_processor","issues_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/writecrow%2Fcorpus_text_processor/issues","issue_labels_count":{"bug":1},"pull_request_labels_count":{"dependencies":1},"issue_author_associations_count":{"MEMBER":7,"NONE":1},"pull_request_author_associations_count":{"MEMBER":2,"NONE":1},"issue_authors":{"markfullmer":7,"unidonburi":1},"pull_request_authors":{"markfullmer":2,"dependabot[bot]":1},"host":{"name":"GitHub","url":"https://github.com","kind":"github","last_synced_at":"2025-09-04T00:00:25.939Z","repositories_count":10081047,"issues_count":31277651,"pull_requests_count":96104459,"authors_count":10693669,"icon_url":"https://github.com/github.png","host_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories","owners_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/owners","authors_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors"},"past_year_issue_labels_count":{},"past_year_pull_request_labels_count":{},"past_year_issue_author_associations_count":{},"past_year_pull_request_author_associations_count":{},"past_year_issue_authors":{},"past_year_pull_request_authors":{},"maintainers":[{"login":"markfullmer","count":9,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/markfullmer"}],"active_maintainers":[]},"events":{"total":{"WatchEvent":2},"last_year":{"WatchEvent":2}},"keywords":["corpora","corpus-linguistics","desktop-app","text-processing"],"dependencies":[{"ecosystem":"pypi","filepath":"requirements.txt","sha":null,"kind":"manifest","created_at":"2022-08-24T15:51:09.427Z","updated_at":"2022-08-24T15:51:09.427Z","repository_link":"https://github.com/writecrow/corpus_text_processor/blob/main/requirements.txt","dependencies":[{"id":2526575999,"package_name":"beautifulsoup4","ecosystem":"pypi","requirements":"==4.8.0","direct":true,"kind":"runtime","optional":false},{"id":2526576000,"package_name":"chardet","ecosystem":"pypi","requirements":"==3.0.4","direct":true,"kind":"runtime","optional":false},{"id":2526576001,"package_name":"docx2txt","ecosystem":"pypi","requirements":"==0.8","direct":true,"kind":"runtime","optional":false},{"id":2526576002,"package_name":"pdf2image","ecosystem":"pypi","requirements":"==1.9.0","direct":true,"kind":"runtime","optional":false},{"id":2526576003,"package_name":"pdfminer.six","ecosystem":"pypi","requirements":"==20181108","direct":true,"kind":"runtime","optional":false},{"id":2526576004,"package_name":"pdf2docx","ecosystem":"pypi","requirements":"==0.5.2","direct":true,"kind":"runtime","optional":false},{"id":2526576005,"package_name":"PyPDF3","ecosystem":"pypi","requirements":"==1.0.1","direct":true,"kind":"runtime","optional":false},{"id":2526576006,"package_name":"PySimpleGUIQt","ecosystem":"pypi","requirements":"==0.28.0","direct":true,"kind":"runtime","optional":false},{"id":2526576007,"package_name":"PySimpleGUI","ecosystem":"pypi","requirements":"==4.38.0","direct":true,"kind":"runtime","optional":false},{"id":2526576008,"package_name":"python-pptx","ecosystem":"pypi","requirements":"==0.6.18","direct":true,"kind":"runtime","optional":false},{"id":2526576009,"package_name":"six","ecosystem":"pypi","requirements":"==1.12.0","direct":true,"kind":"runtime","optional":false},{"id":2526576010,"package_name":"tabulate","ecosystem":"pypi","requirements":"==0.8.6","direct":true,"kind":"runtime","optional":false},{"id":2526576011,"package_name":"striprtf","ecosystem":"pypi","requirements":"==0.0.8","direct":true,"kind":"runtime","optional":false}]},{"ecosystem":"pypi","filepath":"setup.py","sha":null,"kind":"manifest","created_at":"2022-08-24T15:51:09.535Z","updated_at":"2022-08-24T15:51:09.535Z","repository_link":"https://github.com/writecrow/corpus_text_processor/blob/main/setup.py","dependencies":[{"id":2526576324,"package_name":"https","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":2526576328,"package_name":"python-pptx","ecosystem":"pypi","requirements":"==0.6.6","direct":true,"kind":"runtime","optional":false},{"id":2526576330,"package_name":"antiword","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":2526576332,"package_name":"PySimpleGUI","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":2526576333,"package_name":"PySide2","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false}]}],"score":3.58351893845611,"created_at":"2025-09-04T15:50:31.912Z","updated_at":"2025-10-07T08:06:37.867Z","avatar_url":"https://github.com/writecrow.png","language":"Python","category":null,"sub_category":null,"monthly_downloads":0,"funding_links":[],"readme_doi_urls":[],"works":{},"citation_counts":{},"total_citations":0,"keywords_from_contributors":[],"project_url":"https://science.ecosyste.ms/api/v1/projects/9611","html_url":"https://science.ecosyste.ms/projects/9611","bibtex_url":"https://science.ecosyste.ms/projects/9611/export.bibtex","apalike_url":"https://science.ecosyste.ms/projects/9611/export.apalike"}