{"id":78265,"name":null,"description":"Easy Data Preparation with latest LLMs-based Operators and Pipelines.","url":"https://github.com/OpenDCAI/DataFlow","last_synced_at":"2025-09-08T21:18:11.340Z","repository":{"id":258919103,"uuid":"872005985","full_name":"OpenDCAI/DataFlow","owner":"OpenDCAI","description":"Easy Data Preparation with latest LLMs-based Operators and Pipelines.","archived":false,"fork":false,"pushed_at":"2025-09-01T06:44:58.000Z","size":78012,"stargazers_count":1178,"open_issues_count":13,"forks_count":77,"subscribers_count":15,"default_branch":"main","last_synced_at":"2025-09-01T08:57:51.023Z","etag":null,"topics":["data","data-agent","data-cleaning","data-pipelines","data-processing","data-science","data-synthesis","gradio-interface","llms","operators","quick-data-processing","sglang-bankend","vllm-backend"],"latest_commit_sha":null,"homepage":"https://OpenDCAI.github.io/DataFlow-Doc/","language":"Python","has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"apache-2.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/OpenDCAI.png","metadata":{"files":{"readme":"README-dev.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null,"notice":null,"maintainers":null,"copyright":null,"agents":null,"dco":null,"cla":null}},"created_at":"2024-10-13T14:45:45.000Z","updated_at":"2025-09-01T06:45:01.000Z","dependencies_parsed_at":"2025-08-13T13:01:08.504Z","dependency_job_id":"d63f8eae-5bf6-41fa-bb81-56f01b256c66","html_url":"https://github.com/OpenDCAI/DataFlow","commit_stats":null,"previous_names":["open-dataflow/open-dataflow-eval","open-dataflow/dataflow-eval-process","open-dataflow/dataflow","opendcai/dataflow"],"tags_count":7,"template":false,"template_full_name":null,"purl":"pkg:github/OpenDCAI/DataFlow","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/OpenDCAI%2FDataFlow","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/OpenDCAI%2FDataFlow/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/OpenDCAI%2FDataFlow/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/OpenDCAI%2FDataFlow/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/OpenDCAI","download_url":"https://codeload.github.com/OpenDCAI/DataFlow/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/OpenDCAI%2FDataFlow/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":274231508,"owners_count":25245625,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-09-08T02:00:09.813Z","response_time":121,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"owner":{"login":"OpenDCAI","name":"OpenDCAI","uuid":"177615276","kind":"organization","description":"Define the future of Data-centric AI together","email":"PKU_DCML@hotmail.com","website":null,"location":null,"twitter":null,"company":null,"icon_url":"https://avatars.githubusercontent.com/u/177615276?v=4","repositories_count":1,"last_synced_at":"2025-06-28T09:33:27.481Z","metadata":{"has_sponsors_listing":false},"html_url":"https://github.com/OpenDCAI","funding_links":[],"total_stars":56,"followers":48,"following":0,"created_at":"2024-10-21T07:24:15.332Z","updated_at":"2025-06-28T09:33:27.481Z","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/OpenDCAI","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/OpenDCAI/repositories"},"packages":[],"commits":{"id":10957117,"full_name":"OpenDCAI/DataFlow","default_branch":"master","committers":null,"total_commits":null,"total_committers":null,"total_bot_commits":null,"total_bot_committers":null,"mean_commits":null,"dds":null,"past_year_committers":null,"past_year_total_commits":null,"past_year_total_committers":null,"past_year_total_bot_commits":null,"past_year_total_bot_committers":null,"past_year_mean_commits":null,"past_year_dds":null,"last_synced_at":null,"last_synced_commit":null,"created_at":"2025-09-05T21:57:36.500Z","updated_at":"2025-09-05T21:57:36.500Z","commits_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub/repositories/OpenDCAI%2FDataFlow/commits","host":{"name":"GitHub","url":"https://github.com","kind":"github","last_synced_at":"2025-09-08T00:00:16.827Z","repositories_count":5491971,"commits_count":855149458,"contributors_count":31219150,"owners_count":911999,"icon_url":"https://github.com/github.png","host_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://commits.ecosyste.ms/api/v1/hosts/GitHub/repositories"}},"issues_stats":{"full_name":"OpenDCAI/DataFlow","html_url":"https://github.com/OpenDCAI/DataFlow","last_synced_at":"2025-09-06T04:36:20.703Z","status":"active","issues_count":29,"pull_requests_count":117,"avg_time_to_close_issue":255022.66666666666,"avg_time_to_close_pull_request":35133.207317073175,"issues_closed_count":12,"pull_requests_closed_count":82,"pull_request_authors_count":27,"issue_authors_count":22,"avg_comments_per_issue":0.6551724137931034,"avg_comments_per_pull_request":0.19658119658119658,"merged_pull_requests_count":72,"bot_issues_count":0,"bot_pull_requests_count":0,"past_year_issues_count":29,"past_year_pull_requests_count":117,"past_year_avg_time_to_close_issue":255022.66666666666,"past_year_avg_time_to_close_pull_request":35133.207317073175,"past_year_issues_closed_count":12,"past_year_pull_requests_closed_count":82,"past_year_pull_request_authors_count":27,"past_year_issue_authors_count":22,"past_year_avg_comments_per_issue":0.6551724137931034,"past_year_avg_comments_per_pull_request":0.19658119658119658,"past_year_bot_issues_count":0,"past_year_bot_pull_requests_count":0,"past_year_merged_pull_requests_count":72,"created_at":"2025-07-16T12:12:47.879Z","updated_at":"2025-09-06T04:36:20.704Z","repository_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/OpenDCAI%2FDataFlow","issues_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/OpenDCAI%2FDataFlow/issues","issue_labels_count":{"bug":9,"enhancement":8,"question":4},"pull_request_labels_count":{"enhancement":1},"issue_author_associations_count":{"NONE":20,"CONTRIBUTOR":5,"COLLABORATOR":4},"pull_request_author_associations_count":{"CONTRIBUTOR":82,"COLLABORATOR":19,"NONE":16},"issue_authors":{"SunnyHaze":7,"ElsaReedz":2,"ninjaX2o":1,"foxFallingSkies":1,"linzx3501":1,"tpoisonooo":1,"samure1995":1,"jansonheal":1,"miharakator":1,"ixeraby":1,"ackevjajameda":1,"miaode74":1,"acie-1":1,"haolpku":1,"lcysyzxdxc":1,"jiaolongxue":1,"Sucran":1,"haoshao-web":1,"skygram":1,"hhsrx":1,"hastaluegoph":1,"jzzzzh":1},"pull_request_authors":{"haolpku":12,"zzy1127":12,"ZhaoyangHan04":11,"SunnyHaze":10,"Qmeiyi":8,"MOLYHECI":7,"wongzhenhao":7,"HeRunming":7,"scuuy":6,"TechNomad-ds":6,"gty1829":5,"YqjMartin":4,"DeepMindLiuZhou":4,"leaderwolfpipi":3,"Yalin-Feng":2,"Fengzhongzhihan":2,"xyxhchb":1,"JimmyAwoe":1,"CheinTian":1,"DeepThinkingZhouLiu":1,"yuwenkai2003":1,"TaolueWang-dev":1,"TheRoadQaQ":1,"yaodongwen":1,"Sucran":1,"mi-iro":1,"Niujunbo2002":1},"host":{"name":"GitHub","url":"https://github.com","kind":"github","last_synced_at":"2025-09-06T00:00:15.144Z","repositories_count":10119188,"issues_count":31685420,"pull_requests_count":98653205,"authors_count":10709554,"icon_url":"https://github.com/github.png","host_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories","owners_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/owners","authors_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors"},"past_year_issue_labels_count":{"bug":9,"enhancement":8,"question":4},"past_year_pull_request_labels_count":{"enhancement":1},"past_year_issue_author_associations_count":{"NONE":20,"CONTRIBUTOR":5,"COLLABORATOR":4},"past_year_pull_request_author_associations_count":{"CONTRIBUTOR":82,"COLLABORATOR":19,"NONE":16},"past_year_issue_authors":{"SunnyHaze":7,"ElsaReedz":2,"tpoisonooo":1,"Sucran":1,"skygram":1,"samure1995":1,"ninjaX2o":1,"miharakator":1,"miaode74":1,"linzx3501":1,"lcysyzxdxc":1,"jzzzzh":1,"jiaolongxue":1,"jansonheal":1,"ixeraby":1,"hhsrx":1,"hastaluegoph":1,"haoshao-web":1,"haolpku":1,"foxFallingSkies":1,"ackevjajameda":1,"acie-1":1},"past_year_pull_request_authors":{"zzy1127":12,"haolpku":12,"ZhaoyangHan04":11,"SunnyHaze":10,"Qmeiyi":8,"MOLYHECI":7,"wongzhenhao":7,"HeRunming":7,"scuuy":6,"TechNomad-ds":6,"gty1829":5,"DeepMindLiuZhou":4,"YqjMartin":4,"leaderwolfpipi":3,"Fengzhongzhihan":2,"Yalin-Feng":2,"mi-iro":1,"TheRoadQaQ":1,"yaodongwen":1,"xyxhchb":1,"yuwenkai2003":1,"TaolueWang-dev":1,"Sucran":1,"Niujunbo2002":1,"JimmyAwoe":1,"CheinTian":1,"DeepThinkingZhouLiu":1},"maintainers":[{"login":"SunnyHaze","count":13,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/SunnyHaze"},{"login":"MOLYHECI","count":7,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/MOLYHECI"},{"login":"leaderwolfpipi","count":3,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/leaderwolfpipi"}],"active_maintainers":[{"login":"SunnyHaze","count":13,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/SunnyHaze"},{"login":"MOLYHECI","count":7,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/MOLYHECI"},{"login":"leaderwolfpipi","count":3,"url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors/leaderwolfpipi"}]},"events":{"total":{"CreateEvent":16,"IssuesEvent":42,"ReleaseEvent":5,"WatchEvent":571,"DeleteEvent":5,"MemberEvent":1,"IssueCommentEvent":71,"PushEvent":132,"PullRequestReviewCommentEvent":30,"PullRequestReviewEvent":48,"PullRequestEvent":214,"ForkEvent":49},"last_year":{"CreateEvent":16,"IssuesEvent":42,"ReleaseEvent":5,"WatchEvent":571,"DeleteEvent":5,"MemberEvent":1,"IssueCommentEvent":71,"PushEvent":132,"PullRequestReviewCommentEvent":30,"PullRequestReviewEvent":48,"PullRequestEvent":214,"ForkEvent":49}},"keywords":["data","data-agent","data-cleaning","data-pipelines","data-processing","data-science","data-synthesis","gradio-interface","llms","operators","quick-data-processing","sglang-bankend","vllm-backend"],"dependencies":[{"ecosystem":"pypi","filepath":"requirements.txt","sha":null,"kind":"manifest","created_at":"2024-10-21T07:37:55.381Z","updated_at":"2024-10-21T07:37:55.381Z","repository_link":"https://github.com/OpenDCAI/DataFlow/blob/main/requirements.txt","dependencies":[{"id":20320420366,"package_name":"av","ecosystem":"pypi","requirements":"==12.3.0","direct":true,"kind":"runtime","optional":false},{"id":20320420367,"package_name":"decord","ecosystem":"pypi","requirements":"==0.6.0","direct":true,"kind":"runtime","optional":false},{"id":20320420368,"package_name":"einops","ecosystem":"pypi","requirements":"==0.8.0","direct":true,"kind":"runtime","optional":false},{"id":20320420369,"package_name":"fasttext","ecosystem":"pypi","requirements":"==0.9.3","direct":true,"kind":"runtime","optional":false},{"id":20320420382,"package_name":"filelock","ecosystem":"pypi","requirements":"==3.15.4","direct":true,"kind":"runtime","optional":false},{"id":20320420383,"package_name":"fsspec","ecosystem":"pypi","requirements":"==2024.6.1","direct":true,"kind":"runtime","optional":false},{"id":20320420384,"package_name":"ftfy","ecosystem":"pypi","requirements":"==6.2.3","direct":true,"kind":"runtime","optional":false},{"id":20320420385,"package_name":"google-api-core","ecosystem":"pypi","requirements":"==2.19.1","direct":true,"kind":"runtime","optional":false},{"id":20320420386,"package_name":"google-api-python-client","ecosystem":"pypi","requirements":"==2.140.0","direct":true,"kind":"runtime","optional":false},{"id":20320420387,"package_name":"google-auth","ecosystem":"pypi","requirements":"==2.33.0","direct":true,"kind":"runtime","optional":false},{"id":20320420388,"package_name":"google-auth-httplib2","ecosystem":"pypi","requirements":"==0.2.0","direct":true,"kind":"runtime","optional":false},{"id":20320420389,"package_name":"googleapis-common-protos","ecosystem":"pypi","requirements":"==1.63.2","direct":true,"kind":"runtime","optional":false},{"id":20320420390,"package_name":"jsonargparse","ecosystem":"pypi","requirements":"==4.32.0","direct":true,"kind":"runtime","optional":false},{"id":20320420395,"package_name":"kenlm","ecosystem":"pypi","requirements":"==0.2.0","direct":true,"kind":"runtime","optional":false},{"id":20320420696,"package_name":"langkit","ecosystem":"pypi","requirements":"==0.0.33","direct":true,"kind":"runtime","optional":false},{"id":20320421280,"package_name":"loguru","ecosystem":"pypi","requirements":"==0.7.2","direct":true,"kind":"runtime","optional":false},{"id":20320421281,"package_name":"matplotlib","ecosystem":"pypi","requirements":"==3.9.2","direct":true,"kind":"runtime","optional":false},{"id":20320421282,"package_name":"multiprocess","ecosystem":"pypi","requirements":"==0.70.16","direct":true,"kind":"runtime","optional":false},{"id":20320421283,"package_name":"nltk","ecosystem":"pypi","requirements":"==3.8","direct":true,"kind":"runtime","optional":false},{"id":20320421284,"package_name":"numpy","ecosystem":"pypi","requirements":"==1.26.4","direct":true,"kind":"runtime","optional":false},{"id":20320421285,"package_name":"openai","ecosystem":"pypi","requirements":"=1.44.1","direct":true,"kind":"runtime","optional":false},{"id":20320421286,"package_name":"pandas","ecosystem":"pypi","requirements":"==2.2.2","direct":true,"kind":"runtime","optional":false},{"id":20320421287,"package_name":"prettytable","ecosystem":"pypi","requirements":"==3.11.0","direct":true,"kind":"runtime","optional":false},{"id":20320421288,"package_name":"pyspark","ecosystem":"pypi","requirements":"==3.5.2","direct":true,"kind":"runtime","optional":false},{"id":20320421289,"package_name":"PyYAML","ecosystem":"pypi","requirements":"==6.0.2","direct":true,"kind":"runtime","optional":false},{"id":20320421290,"package_name":"regex","ecosystem":"pypi","requirements":"==2024.7.24","direct":true,"kind":"runtime","optional":false},{"id":20320421291,"package_name":"safetensors","ecosystem":"pypi","requirements":"==0.4.4","direct":true,"kind":"runtime","optional":false},{"id":20320421292,"package_name":"scikit-learn","ecosystem":"pypi","requirements":"==1.5.1","direct":true,"kind":"runtime","optional":false},{"id":20320421293,"package_name":"scikit-video","ecosystem":"pypi","requirements":"==1.1.11","direct":true,"kind":"runtime","optional":false},{"id":20320421294,"package_name":"scipy","ecosystem":"pypi","requirements":"==1.13.1","direct":true,"kind":"runtime","optional":false},{"id":20320421295,"package_name":"sentencepiece","ecosystem":"pypi","requirements":"==0.2.0","direct":true,"kind":"runtime","optional":false},{"id":20320421296,"package_name":"setuptools","ecosystem":"pypi","requirements":"==72.1.0","direct":true,"kind":"runtime","optional":false},{"id":20320421297,"package_name":"timm","ecosystem":"pypi","requirements":"==1.0.8","direct":true,"kind":"runtime","optional":false},{"id":20320421298,"package_name":"torch","ecosystem":"pypi","requirements":"==2.4.0","direct":true,"kind":"runtime","optional":false},{"id":20320421299,"package_name":"torchvision","ecosystem":"pypi","requirements":"==0.19.0","direct":true,"kind":"runtime","optional":false},{"id":20320421300,"package_name":"tqdm","ecosystem":"pypi","requirements":"==4.66.5","direct":true,"kind":"runtime","optional":false},{"id":20320421301,"package_name":"transformers","ecosystem":"pypi","requirements":"==4.44.2","direct":true,"kind":"runtime","optional":false},{"id":20320421302,"package_name":"vendi-score","ecosystem":"pypi","requirements":"==0.0.3","direct":true,"kind":"runtime","optional":false},{"id":20320421308,"package_name":"vllm","ecosystem":"pypi","requirements":"==0.6.0","direct":true,"kind":"runtime","optional":false},{"id":20320421309,"package_name":"wget","ecosystem":"pypi","requirements":"==3.2","direct":true,"kind":"runtime","optional":false}]},{"ecosystem":"actions","filepath":".github/workflows/python-publish.yml","sha":null,"kind":"manifest","created_at":"2025-07-12T19:24:40.565Z","updated_at":"2025-07-12T19:24:40.565Z","repository_link":"https://github.com/OpenDCAI/DataFlow/blob/main/.github/workflows/python-publish.yml","dependencies":[{"id":24111715587,"package_name":"actions/checkout","ecosystem":"actions","requirements":"v4","direct":true,"kind":"composite","optional":false},{"id":24111715588,"package_name":"actions/setup-python","ecosystem":"actions","requirements":"v5","direct":true,"kind":"composite","optional":false},{"id":24111715589,"package_name":"actions/upload-artifact","ecosystem":"actions","requirements":"v4","direct":true,"kind":"composite","optional":false},{"id":24111715590,"package_name":"actions/download-artifact","ecosystem":"actions","requirements":"v4","direct":true,"kind":"composite","optional":false},{"id":24111715591,"package_name":"pypa/gh-action-pypi-publish","ecosystem":"actions","requirements":"release/v1","direct":true,"kind":"composite","optional":false}]},{"ecosystem":"actions","filepath":".github/workflows/test.yml","sha":null,"kind":"manifest","created_at":"2025-07-12T19:24:40.633Z","updated_at":"2025-07-12T19:24:40.633Z","repository_link":"https://github.com/OpenDCAI/DataFlow/blob/main/.github/workflows/test.yml","dependencies":[{"id":24111715652,"package_name":"actions/checkout","ecosystem":"actions","requirements":"v4","direct":true,"kind":"composite","optional":false},{"id":24111715654,"package_name":"actions/setup-python","ecosystem":"actions","requirements":"v3","direct":true,"kind":"composite","optional":false}]},{"ecosystem":"pypi","filepath":"pyproject.toml","sha":null,"kind":"manifest","created_at":"2025-07-12T19:24:40.702Z","updated_at":"2025-07-12T19:24:40.702Z","repository_link":"https://github.com/OpenDCAI/DataFlow/blob/main/pyproject.toml","dependencies":[]},{"ecosystem":"pypi","filepath":"requirements-kbc.txt","sha":null,"kind":"manifest","created_at":"2025-07-12T19:24:40.746Z","updated_at":"2025-07-12T19:24:40.746Z","repository_link":"https://github.com/OpenDCAI/DataFlow/blob/main/requirements-kbc.txt","dependencies":[{"id":24111715700,"package_name":"fairy-doc","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111715710,"package_name":"chonkie","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111715719,"package_name":"trafilatura","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false}]},{"ecosystem":"pypi","filepath":"requirements-muxi.txt","sha":null,"kind":"manifest","created_at":"2025-07-12T19:24:41.274Z","updated_at":"2025-07-12T19:24:41.274Z","repository_link":"https://github.com/OpenDCAI/DataFlow/blob/main/requirements-muxi.txt","dependencies":[{"id":24111715720,"package_name":"datasets","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111715721,"package_name":"numpy","ecosystem":"pypi","requirements":"\u003c2.0.0","direct":true,"kind":"runtime","optional":false},{"id":24111715722,"package_name":"scipy","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111715723,"package_name":"torch","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111715724,"package_name":"tqdm","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111715725,"package_name":"transformers","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111715726,"package_name":"aisuite","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111716171,"package_name":"math_verify","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111717300,"package_name":"word2number","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111718164,"package_name":"accelerate","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111718165,"package_name":"rapidfuzz","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111718166,"package_name":"colorlog","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111718167,"package_name":"appdirs","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111718168,"package_name":"datasketch","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111719649,"package_name":"modelscope","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111719650,"package_name":"addict","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111719651,"package_name":"pytest","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false}]},{"ecosystem":"pypi","filepath":"requirements-text.txt","sha":null,"kind":"manifest","created_at":"2025-07-12T19:24:44.958Z","updated_at":"2025-07-12T19:24:44.958Z","repository_link":"https://github.com/OpenDCAI/DataFlow/blob/main/requirements-text.txt","dependencies":[{"id":24111720063,"package_name":"fasttext","ecosystem":"pypi","requirements":"==0.9.3","direct":true,"kind":"runtime","optional":false},{"id":24111721006,"package_name":"filelock","ecosystem":"pypi","requirements":"==3.15.4","direct":true,"kind":"runtime","optional":false},{"id":24111721007,"package_name":"google-api-core","ecosystem":"pypi","requirements":"==2.19.1","direct":true,"kind":"runtime","optional":false},{"id":24111721008,"package_name":"google-api-python-client","ecosystem":"pypi","requirements":"==2.140.0","direct":true,"kind":"runtime","optional":false},{"id":24111721009,"package_name":"google-auth","ecosystem":"pypi","requirements":"==2.33.0","direct":true,"kind":"runtime","optional":false},{"id":24111721010,"package_name":"google-auth-httplib2","ecosystem":"pypi","requirements":"==0.2.0","direct":true,"kind":"runtime","optional":false},{"id":24111721011,"package_name":"googleapis-common-protos","ecosystem":"pypi","requirements":"==1.63.2","direct":true,"kind":"runtime","optional":false},{"id":24111721012,"package_name":"kenlm","ecosystem":"pypi","requirements":"==0.3.0","direct":true,"kind":"runtime","optional":false},{"id":24111722759,"package_name":"langkit","ecosystem":"pypi","requirements":"==0.0.33","direct":true,"kind":"runtime","optional":false},{"id":24111723780,"package_name":"loguru","ecosystem":"pypi","requirements":"==0.7.2","direct":true,"kind":"runtime","optional":false},{"id":24111723781,"package_name":"matplotlib","ecosystem":"pypi","requirements":"==3.9.2","direct":true,"kind":"runtime","optional":false},{"id":24111723782,"package_name":"multiprocess","ecosystem":"pypi","requirements":"==0.70.16","direct":true,"kind":"runtime","optional":false},{"id":24111723783,"package_name":"openai","ecosystem":"pypi","requirements":"=","direct":true,"kind":"runtime","optional":false},{"id":24111723784,"package_name":"prettytable","ecosystem":"pypi","requirements":"==3.11.0","direct":true,"kind":"runtime","optional":false},{"id":24111723785,"package_name":"pyspark","ecosystem":"pypi","requirements":"==3.5.2","direct":true,"kind":"runtime","optional":false},{"id":24111723786,"package_name":"sentencepiece","ecosystem":"pypi","requirements":"==0.2.0","direct":true,"kind":"runtime","optional":false},{"id":24111723787,"package_name":"vendi-score","ecosystem":"pypi","requirements":"==0.0.3","direct":true,"kind":"runtime","optional":false},{"id":24111725349,"package_name":"wget","ecosystem":"pypi","requirements":"==3.2","direct":true,"kind":"runtime","optional":false},{"id":24111725350,"package_name":"nltk","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111725351,"package_name":"presidio_analyzer","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111725359,"package_name":"presidio_anonymizer","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111725360,"package_name":"gdown","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111725361,"package_name":"simhash","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111725363,"package_name":"datasketch","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111725364,"package_name":"gensim","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111725408,"package_name":"pot","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111726156,"package_name":"hlepor","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111726244,"package_name":"nptyping","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111726798,"package_name":"sacrebleu","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false},{"id":24111726799,"package_name":"bert_score","ecosystem":"pypi","requirements":"*","direct":true,"kind":"runtime","optional":false}]}],"score":null,"created_at":"2025-09-08T20:55:08.393Z","updated_at":"2025-10-07T08:28:00.002Z","avatar_url":"https://github.com/OpenDCAI.png","language":"Python","category":null,"sub_category":null,"monthly_downloads":0,"funding_links":[],"readme_doi_urls":[],"works":{},"citation_counts":{},"total_citations":0,"keywords_from_contributors":[],"project_url":"https://science.ecosyste.ms/api/v1/projects/78265","html_url":"https://science.ecosyste.ms/projects/78265"}