{"id":104949,"name":null,"description":"A Survey on Data Selection for Language Models","url":"https://github.com/alon-albalak/data-selection-survey","last_synced_at":"2025-09-09T04:14:13.542Z","repository":{"id":224114596,"uuid":"762454923","full_name":"alon-albalak/data-selection-survey","owner":"alon-albalak","description":"A Survey on Data Selection for Language Models","archived":false,"fork":false,"pushed_at":"2025-04-29T00:35:04.000Z","size":1627,"stargazers_count":227,"open_issues_count":1,"forks_count":13,"subscribers_count":5,"default_branch":"main","last_synced_at":"2025-04-29T01:36:43.998Z","etag":null,"topics":["data-selection","language-model","llm","survey"],"latest_commit_sha":null,"homepage":"","language":null,"has_issues":true,"has_wiki":null,"has_pages":null,"mirror_url":null,"source_name":null,"license":"cc0-1.0","status":null,"scm":"git","pull_requests_enabled":true,"icon_url":"https://github.com/alon-albalak.png","metadata":{"files":{"readme":"README.md","changelog":null,"contributing":null,"funding":null,"license":"LICENSE","code_of_conduct":null,"threat_model":null,"audit":null,"citation":null,"codeowners":null,"security":null,"support":null,"governance":null,"roadmap":null,"authors":null,"dei":null,"publiccode":null,"codemeta":null,"zenodo":null}},"created_at":"2024-02-23T20:20:34.000Z","updated_at":"2025-04-29T00:35:07.000Z","dependencies_parsed_at":"2024-02-23T21:31:48.784Z","dependency_job_id":"ea7a43cc-c398-423c-b679-ae8f3c6b20a3","html_url":"https://github.com/alon-albalak/data-selection-survey","commit_stats":null,"previous_names":["alon-albalak/data-selection-survey"],"tags_count":0,"template":false,"template_full_name":null,"purl":"pkg:github/alon-albalak/data-selection-survey","repository_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/alon-albalak%2Fdata-selection-survey","tags_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/alon-albalak%2Fdata-selection-survey/tags","releases_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/alon-albalak%2Fdata-selection-survey/releases","manifests_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/alon-albalak%2Fdata-selection-survey/manifests","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/alon-albalak","download_url":"https://codeload.github.com/alon-albalak/data-selection-survey/tar.gz/refs/heads/main","sbom_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories/alon-albalak%2Fdata-selection-survey/sbom","scorecard":null,"host":{"name":"GitHub","url":"https://github.com","kind":"github","repositories_count":274243079,"owners_count":25248148,"icon_url":"https://github.com/github.png","version":null,"created_at":"2022-05-30T11:31:42.601Z","updated_at":"2022-07-04T15:15:14.044Z","status":"online","status_checked_at":"2025-09-09T02:00:10.223Z","response_time":80,"last_error":null,"robots_txt_status":"success","robots_txt_updated_at":"2025-07-24T06:49:26.215Z","robots_txt_url":"https://github.com/robots.txt","online":true,"can_crawl_api":true,"host_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repositories","repository_names_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/repository_names","owners_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners"}},"owner":{"login":"alon-albalak","name":"Alon Albalak","uuid":"25569766","kind":"user","description":"PhD student, Natural Language Processing and Deep Learning","email":"","website":"https://alon-albalak.github.io/","location":"Santa Barbara, CA","twitter":"AlbalakAlon","company":null,"icon_url":"https://avatars.githubusercontent.com/u/25569766?u=d5e6f90d2bd8873fb1cd463cf0ae0d9d331fc453\u0026v=4","repositories_count":5,"last_synced_at":"2023-03-04T13:55:28.975Z","metadata":{"has_sponsors_listing":false},"html_url":"https://github.com/alon-albalak","funding_links":[],"total_stars":null,"followers":null,"following":null,"created_at":"2022-11-14T19:25:59.871Z","updated_at":"2023-03-04T13:55:28.986Z","owner_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/alon-albalak","repositories_url":"https://repos.ecosyste.ms/api/v1/hosts/GitHub/owners/alon-albalak/repositories"},"packages":null,"commits":{"message":"Repository syncing started."},"issues_stats":{"full_name":"alon-albalak/data-selection-survey","html_url":"https://github.com/alon-albalak/data-selection-survey","last_synced_at":null,"status":null,"issues_count":null,"pull_requests_count":null,"avg_time_to_close_issue":null,"avg_time_to_close_pull_request":null,"issues_closed_count":null,"pull_requests_closed_count":null,"pull_request_authors_count":null,"issue_authors_count":null,"avg_comments_per_issue":null,"avg_comments_per_pull_request":null,"merged_pull_requests_count":null,"bot_issues_count":null,"bot_pull_requests_count":null,"past_year_issues_count":null,"past_year_pull_requests_count":null,"past_year_avg_time_to_close_issue":null,"past_year_avg_time_to_close_pull_request":null,"past_year_issues_closed_count":null,"past_year_pull_requests_closed_count":null,"past_year_pull_request_authors_count":null,"past_year_issue_authors_count":null,"past_year_avg_comments_per_issue":null,"past_year_avg_comments_per_pull_request":null,"past_year_bot_issues_count":null,"past_year_bot_pull_requests_count":null,"past_year_merged_pull_requests_count":null,"created_at":"2025-08-31T02:44:13.238Z","updated_at":"2025-09-02T12:11:23.728Z","repository_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/alon-albalak%2Fdata-selection-survey","issues_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories/alon-albalak%2Fdata-selection-survey/issues","issue_labels_count":{},"pull_request_labels_count":{},"issue_author_associations_count":{"NONE":1},"pull_request_author_associations_count":{"NONE":3},"issue_authors":{"dheeraj7596":1},"pull_request_authors":{"koalazf99":1,"jwmueller":1,"ZifanL":1},"host":{"name":"GitHub","url":"https://github.com","kind":"github","last_synced_at":"2025-09-09T00:00:11.778Z","repositories_count":10176276,"issues_count":31902430,"pull_requests_count":99999179,"authors_count":10735433,"icon_url":"https://github.com/github.png","host_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub","repositories_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/repositories","owners_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/owners","authors_url":"https://issues.ecosyste.ms/api/v1/hosts/GitHub/authors"},"past_year_issue_labels_count":{},"past_year_pull_request_labels_count":{},"past_year_issue_author_associations_count":{},"past_year_pull_request_author_associations_count":{"NONE":2},"past_year_issue_authors":{},"past_year_pull_request_authors":{"koalazf99":1,"ZifanL":1},"maintainers":[],"active_maintainers":[]},"events":{"total":{"WatchEvent":75,"PushEvent":1,"PullRequestReviewEvent":1,"PullRequestEvent":3,"ForkEvent":5},"last_year":{"WatchEvent":75,"PushEvent":1,"PullRequestReviewEvent":1,"PullRequestEvent":3,"ForkEvent":5}},"keywords":["data-selection","language-model","llm","survey"],"dependencies":[],"score":null,"created_at":"2025-09-08T22:20:58.831Z","updated_at":"2025-10-07T08:31:36.832Z","avatar_url":"https://github.com/alon-albalak.png","language":null,"category":null,"sub_category":null,"monthly_downloads":0,"funding_links":[],"readme_doi_urls":["http://dx.doi.org/10.1126/science.abq1158","https://doi.org/10.1145/362686.362692","https://doi.org/10.1137/0222058","https://doi.org/10.1145/509907.509965","https://doi.org/10.1145/1645953.1646283","https://doi.org/10.1145/3581641.3584059"],"works":{},"citation_counts":{},"total_citations":0,"keywords_from_contributors":[],"project_url":"https://science.ecosyste.ms/api/v1/projects/104949","html_url":"https://science.ecosyste.ms/projects/104949"}