Science Score: 44.0%
This score indicates how likely this project is to be science-related based on various indicators:
-
✓CITATION.cff file
Found CITATION.cff file -
✓codemeta.json file
Found codemeta.json file -
✓.zenodo.json file
Found .zenodo.json file -
○DOI references
-
○Academic publication links
-
○Academic email domains
-
○Institutional organization owner
-
○JOSS paper metadata
-
○Scientific vocabulary similarity
Low similarity (2.2%) to scientific vocabulary
Repository
diffusers 0.33.1 for TPU t2v i2v migration
Basic Info
- Host: GitHub
- Owner: shungcp
- License: apache-2.0
- Language: Python
- Default Branch: main
- Size: 7.7 MB
Statistics
- Stars: 0
- Watchers: 0
- Forks: 6
- Open Issues: 1
- Releases: 0
Metadata Files
README.md
Original readme moved to README_original.md
install
Install dependencies, setup virtual env first if required.
sh
sh -ex setup-dep.sh
To run:
python wan_tx_splash_attn.py
Progress:
(Jun 17)
┏━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┓
┃ Device ┃ Memory usage ┃ Duty cycle ┃
┡━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━┩
│ 0 │ 2.09 GiB / 31.25 GiB │ 0.00% │
│ 1 │ 2.08 GiB / 31.25 GiB │ 0.00% │
│ 2 │ 2.08 GiB / 31.25 GiB │ 0.00% │
│ 3 │ 2.08 GiB / 31.25 GiB │ 0.00% │
│ 4 │ 2.08 GiB / 31.25 GiB │ 0.00% │
│ 5 │ 2.08 GiB / 31.25 GiB │ 0.00% │
│ 6 │ 2.08 GiB / 31.25 GiB │ 0.00% │
│ 7 │ 2.08 GiB / 31.25 GiB │ 0.00% │
sizes:
wan 1.3B:
text_encoder 12.537574768066406 G transformer 2.64891254901886 G vae 0.23635575734078884 G
wan 14B
text_encoder 12.537574768066406 G transformer 26.66874897480011 G vae 0.23635575734078884 G
Shapes of weights for 1.3B model:
vae
encoder.conv_in.weight : # (torch.Size([96, 3, 3, 3, 3]), torch.bfloat16)
encoder.conv_in.bias : # (torch.Size([96]), torch.bfloat16)
encoder.down_blocks.*.norm*.gamma : # (torch.Size([384, 1, 1, 1]), torch.bfloat16)
encoder.down_blocks.*.conv*.weight : # (torch.Size([384, 384, 3, 3, 3]), torch.bfloat16)
encoder.down_blocks.*.conv*.bias : # (torch.Size([384]), torch.bfloat16)
encoder.down_blocks.*.resample.*.weight : # (torch.Size([384, 384, 3, 3]), torch.bfloat16)
encoder.down_blocks.*.resample.*.bias : # (torch.Size([384]), torch.bfloat16)
encoder.down_blocks.*.conv_shortcut.weight : # (torch.Size([384, 192, 1, 1, 1]), torch.bfloat16)
encoder.down_blocks.*.conv_shortcut.bias : # (torch.Size([384]), torch.bfloat16)
encoder.down_blocks.*.time_conv.weight : # (torch.Size([384, 384, 3, 1, 1]), torch.bfloat16)
encoder.down_blocks.*.time_conv.bias : # (torch.Size([384]), torch.bfloat16)
encoder.mid_block.attentions.*.norm.gamma : # (torch.Size([384, 1, 1]), torch.bfloat16)
encoder.mid_block.attentions.*.to_qkv.weight : # (torch.Size([1152, 384, 1, 1]), torch.bfloat16)
encoder.mid_block.attentions.*.to_qkv.bias : # (torch.Size([1152]), torch.bfloat16)
encoder.mid_block.attentions.*.proj.weight : # (torch.Size([384, 384, 1, 1]), torch.bfloat16)
encoder.mid_block.attentions.*.proj.bias : # (torch.Size([384]), torch.bfloat16)
encoder.mid_block.resnets.*.norm*.gamma : # (torch.Size([384, 1, 1, 1]), torch.bfloat16)
encoder.mid_block.resnets.*.conv*.weight : # (torch.Size([384, 384, 3, 3, 3]), torch.bfloat16)
encoder.mid_block.resnets.*.conv*.bias : # (torch.Size([384]), torch.bfloat16)
encoder.norm_out.gamma : # (torch.Size([384, 1, 1, 1]), torch.bfloat16)
encoder.conv_out.weight : # (torch.Size([32, 384, 3, 3, 3]), torch.bfloat16)
encoder.conv_out.bias : # (torch.Size([32]), torch.bfloat16)
quant_conv.weight : # (torch.Size([32, 32, 1, 1, 1]), torch.bfloat16)
quant_conv.bias : # (torch.Size([32]), torch.bfloat16)
post_quant_conv.weight : # (torch.Size([16, 16, 1, 1, 1]), torch.bfloat16)
post_quant_conv.bias : # (torch.Size([16]), torch.bfloat16)
decoder.conv_in.weight : # (torch.Size([384, 16, 3, 3, 3]), torch.bfloat16)
decoder.conv_in.bias : # (torch.Size([384]), torch.bfloat16)
decoder.mid_block.attentions.*.norm.gamma : # (torch.Size([384, 1, 1]), torch.bfloat16)
decoder.mid_block.attentions.*.to_qkv.weight : # (torch.Size([1152, 384, 1, 1]), torch.bfloat16)
decoder.mid_block.attentions.*.to_qkv.bias : # (torch.Size([1152]), torch.bfloat16)
decoder.mid_block.attentions.*.proj.weight : # (torch.Size([384, 384, 1, 1]), torch.bfloat16)
decoder.mid_block.attentions.*.proj.bias : # (torch.Size([384]), torch.bfloat16)
decoder.mid_block.resnets.*.norm*.gamma : # (torch.Size([384, 1, 1, 1]), torch.bfloat16)
decoder.mid_block.resnets.*.conv*.weight : # (torch.Size([384, 384, 3, 3, 3]), torch.bfloat16)
decoder.mid_block.resnets.*.conv*.bias : # (torch.Size([384]), torch.bfloat16)
decoder.up_blocks.*.resnets.*.norm*.gamma : # (torch.Size([96, 1, 1, 1]), torch.bfloat16)
decoder.up_blocks.*.resnets.*.conv*.weight : # (torch.Size([96, 96, 3, 3, 3]), torch.bfloat16)
decoder.up_blocks.*.resnets.*.conv*.bias : # (torch.Size([96]), torch.bfloat16)
decoder.up_blocks.*.upsamplers.*.resample.*.weight : # (torch.Size([96, 192, 3, 3]), torch.bfloat16)
decoder.up_blocks.*.upsamplers.*.resample.*.bias : # (torch.Size([96]), torch.bfloat16)
decoder.up_blocks.*.upsamplers.*.time_conv.weight : # (torch.Size([768, 384, 3, 1, 1]), torch.bfloat16)
decoder.up_blocks.*.upsamplers.*.time_conv.bias : # (torch.Size([768]), torch.bfloat16)
decoder.up_blocks.*.resnets.*.conv_shortcut.weight : # (torch.Size([384, 192, 1, 1, 1]), torch.bfloat16)
decoder.up_blocks.*.resnets.*.conv_shortcut.bias : # (torch.Size([384]), torch.bfloat16)
decoder.norm_out.gamma : # (torch.Size([96, 1, 1, 1]), torch.bfloat16)
decoder.conv_out.weight : # (torch.Size([3, 96, 3, 3, 3]), torch.bfloat16)
decoder.conv_out.bias : # (torch.Size([3]), torch.bfloat16)
transformer
scale_shift_table : # (torch.Size([1, 2, 1536]), torch.float32)
patch_embedding.weight : # (torch.Size([1536, 16, 1, 2, 2]), torch.bfloat16)
patch_embedding.bias : # (torch.Size([1536]), torch.bfloat16)
condition_embedder.time_embedder.linear_*.weight : # (torch.Size([1536, 1536]), torch.float32)
condition_embedder.time_embedder.linear_*.bias : # (torch.Size([1536]), torch.float32)
condition_embedder.time_proj.weight : # (torch.Size([9216, 1536]), torch.bfloat16)
condition_embedder.time_proj.bias : # (torch.Size([9216]), torch.bfloat16)
condition_embedder.text_embedder.linear_*.weight : # (torch.Size([1536, 1536]), torch.bfloat16)
condition_embedder.text_embedder.linear_*.bias : # (torch.Size([1536]), torch.bfloat16)
blocks.*.scale_shift_table : # (torch.Size([1, 6, 1536]), torch.float32)
blocks.*.attn*.norm_q.weight : # (torch.Size([1536]), torch.bfloat16)
blocks.*.attn*.norm_k.weight : # (torch.Size([1536]), torch.bfloat16)
blocks.*.attn*.to_q.weight : # (torch.Size([1536, 1536]), torch.bfloat16)
blocks.*.attn*.to_q.bias : # (torch.Size([1536]), torch.bfloat16)
blocks.*.attn*.to_k.weight : # (torch.Size([1536, 1536]), torch.bfloat16)
blocks.*.attn*.to_k.bias : # (torch.Size([1536]), torch.bfloat16)
blocks.*.attn*.to_v.weight : # (torch.Size([1536, 1536]), torch.bfloat16)
blocks.*.attn*.to_v.bias : # (torch.Size([1536]), torch.bfloat16)
blocks.*.attn*.to_out.*.weight : # (torch.Size([1536, 1536]), torch.bfloat16)
blocks.*.attn*.to_out.*.bias : # (torch.Size([1536]), torch.bfloat16)
blocks.*.norm*.weight : # (torch.Size([1536]), torch.float32)
blocks.*.norm*.bias : # (torch.Size([1536]), torch.float32)
blocks.*.ffn.net.*.proj.weight : # (torch.Size([8960, 1536]), torch.bfloat16)
blocks.*.ffn.net.*.proj.bias : # (torch.Size([8960]), torch.bfloat16)
blocks.*.ffn.net.*.weight : # (torch.Size([1536, 8960]), torch.bfloat16)
blocks.*.ffn.net.*.bias : # (torch.Size([1536]), torch.bfloat16)
proj_out.weight : # (torch.Size([64, 1536]), torch.bfloat16)
proj_out.bias : # (torch.Size([64]), torch.bfloat16)
text encoder
shared.weight : # (torch.Size([256384, 4096]), torch.bfloat16)
encoder.block.*.layer.*.SelfAttention.q.weight : # (torch.Size([4096, 4096]), torch.bfloat16)
encoder.block.*.layer.*.SelfAttention.k.weight : # (torch.Size([4096, 4096]), torch.bfloat16)
encoder.block.*.layer.*.SelfAttention.v.weight : # (torch.Size([4096, 4096]), torch.bfloat16)
encoder.block.*.layer.*.SelfAttention.o.weight : # (torch.Size([4096, 4096]), torch.bfloat16)
encoder.block.*.layer.*.SelfAttention.relative_attention_bias.weight : # (torch.Size([32, 64]), torch.bfloat16)
encoder.block.*.layer.*.layer_norm.weight : # (torch.Size([4096]), torch.bfloat16)
encoder.block.*.layer.*.DenseReluDense.wi_*.weight : # (torch.Size([10240, 4096]), torch.bfloat16)
encoder.block.*.layer.*.DenseReluDense.wo.weight : # (torch.Size([4096, 10240]), torch.bfloat16)
encoder.final_layer_norm.weight : # (torch.Size([4096]), torch.bfloat16)
adding flash attention inconsistant issue unit test
``` python comparefasharding_consistency.py
...
FINAL CONCLUSION
==============================
...
```
wantxsplash_attn.py combine the jax pallas splash attention, maxdiffusion vae decoder
```
on v6e-8:
(venv)$ python wantxsplashattn.py
Load and port Wan 2.1 VAE on tpu
Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████| 12/12 [00:01<00:00, 10.11it/s]
Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████| 5/5 [00:01<00:00, 4.60it/s]
Loading pipeline components...: 100%|███████████████████████████████████████████████████████████| 5/5 [00:03<00:00, 1.63it/s]
`losstype=Nonewas set in the config but it is unrecognised.Using the default loss:ForCausalLMLoss.
Number of devices is:, 8
text_encoder 12.537574768066406 G
transformer 26.66874897480011 G
vae (JAX VAE) - size calculation not implemented
return lax_numpy.astype(self, dtype, copy=copy, device=device)
100%|█████████████████████████████████████████████████████████████████████████████████████████| 50/50 [08:16<00:00, 9.94s/it]
numpy shape: (720, 1280, 3, 81)
100%|█████████████████████████████████████████████████████████████████████████████████████████| 50/50 [06:29<00:00, 7.80s/it]
Iteration 0: 418.294946s
DONE
``
support flash attention
Current support flash attention to generate correct normal 14B model, 81 frames videos. Flash attention prevent the huge attention weight which cause OOM.
1.3B model is not yet ready using flash attention since kvhead = 12 cannot divide by 8 tpus. Disable flash attention for VAE for now since kvhead = 1 in VAE.
Modify flash attention block size to 2048 528s
multi-host run on v6e-16
- create tpu vm with v6e-16.
- it will create 4 hosts with 4x4 gpus mesh
- all the command use gcloud to distribute to all workers.
```
Remember to replace variable in placeholder
setup env
export PROJECTID=<projectid>
export TPUNAME=<tpuname>
export ZONE=
export ACCOUNT=
run() { local command=$1 local worker=${2:-all} gcloud compute tpus tpu-vm ssh --zone "${ZONE}" "${ACCOUNT}@${TPUNAME}" --project "${PROJECTID}" --worker=${worker} --command="$command" }
SETUPCOMMAND="\ set -x && \ sudo apt update && \ sudo apt install -y python3.10-venv && \ python -m venv venv && \ source venv/bin/activate && \ git clone -b ${GITHUBBRANCH} ${GITHUB_ADDRESS} || true && \ cd diffusers && \ sh -ex setup-dep.sh \ "
Only need run the first time
run "${SETUP_COMMAND}"
RUNCOMMAND="\ set -x && \ source ~/venv/bin/activate && \ killall -9 python || true && \ sleep 10 && \ export JAXCOMPILATIONCACHEDIR="/dev/shm/jaxcache" && \ export JAXPERSISTENTCACHEMINENTRYSIZEBYTES=-1 && \ export JAXPERSISTENTCACHEMINCOMPILETIMESECS=0 && \ export JAXPERSISTENTCACHEENABLEXLACACHES='xlagpuperfusionautotunecachedir' && \ export HFHUBCACHE=/dev/shm/hfcache && \ cd diffusers && \ git fetch && git reset --hard origin/${GITHUBBRANCH} && \ nohup python wantx.py > wantx.log 2>&1 & \ " run "${RUN_COMMAND}"
``` ssh into a VM to collect the log in wan_tx.log and video generated.
Add DP support
v6e-16 need use DP to divide head_dim=40 .
test using flash attention:
* v6e-8 with dp=2, tp=4:
* 528s -> 490s
* v6e-16 with dp=2, tp=8:
* 358s
With wantxsplash_attn: Do not support DP on v6e-8 for now. The VAE will OOM. * v6e-16 with dp=2, tp=8: * 257s
Add SP support
test using flash attention wan_tx:
* v6e-8 with dp=1, tp=4, sp=2:
* 519s
* v6e-8 with dp=2, tp=2, sp=2:
* VAE OOM
* v6e-16 with dp=2, tp=4, sp=2:
* 319s
test with wantxsplash_attn: * v6e-16 with dp=2, tp=4, sp=2: * VAE OOM
Modify maxdiffusion to reduce memory usage
To utilize sp with maxdiffusion vae, need to reduce the peak memory usage.
Modification is in https://github.com/yuyanpeng-google/maxdiffusion/tree/wan2.1-dev.
with wantxsplash_attn.py * v6e-8 with dp=2, sp=1, tp=4: * 397s * v6e-16 with dp=2, sp=2, tp=4: * 215s
=======
Add VAE sharding
- v6e-8, dp=2, sp=1, tp=4
- python wantxsplashattn.py --usedp --sp_num=1
- 100%|█████████████| 50/50 [06:06<00:00, 7.32s/it]
- Iteration 0: 376.032197s
- v6e-16, dp=2, sp=2, tp=4
- python wantxsplashattn.py --usedp --sp_num=2
- 100%|██████████| 50/50 [03:16<00:00, 3.93s/it]
- Iteration 0: 205.504582s
- v6e-32, dp=2, sp=2, tp=8
- python wantxsplashattn.py --usedp --sp_num=2
- 100%|██████████| 50/50 [02:06<00:00, 2.53s/it]
- Iteration 0: 134.893512s
VAE is consuming about 10s now.
Tune default and add bqsize, bkvsize as arguments
- v6e-16, dp=2, sp=2, tp=4
- python wantxsplashattn.py --usedp --sp_num=2 --bqsize 1512 --bkvsize 1024
- 100%|██████████| 50/50 [03:07<00:00, 3.76s/it]
- Iteration 0: BKVSIZE=1024, BQSIZE=1512: 196.695673s
Adjust sharding using FSDP on sequence and remesh to head on self attn, and still sp on cross attn
To prevent all reduce on long sequence. The optimal block size may change. Not sweep the block size yet.
- v6e-16, dp=2, sp=1, tp=8
- python wantxsplashattn.py --usedp --sp_num=1 --bqsize 1512 --bkvsize 1024
- 100%|██████████| 50/50 [02:55<00:00, 3.50s/it]
- Iteration 0 BKVSIZE=1024, BQSIZE=1512: 184.074076s
Sweep best block size and use TP again
Use TP faster 1.5s than FSDP on v6e-16. Use parameter --use_fsdp to use the FSDP.
- v6e-16 With best block size:
- 100%|██████████| 50/50 [02:42<00:00, 3.24s/it]
- Iteration 0 BKVCOMPUTESIZE=1024 BKVSIZE=2048, BQSIZE=3024: 171.043314s
Fix not sharding value of attention correctly
python wan_tx_splash_attn.py --use_k_smooth=False
* v6e-16
* 100%|██████████| 50/50 [02:13<00:00, 2.68s/it]
* Iteration 0 BKVCOMPUTESIZE=1024 BKVSIZE=2048, BQSIZE=3024: $${\color{red}143.149646s}$$
Owner
- Login: shungcp
- Kind: user
- Location: Beijing
- Company: Google Cloud
- Website: https://github.com/shungcp
- Repositories: 1
- Profile: https://github.com/shungcp
Citation (CITATION.cff)
cff-version: 1.2.0
title: 'Diffusers: State-of-the-art diffusion models'
message: >-
If you use this software, please cite it using the
metadata from this file.
type: software
authors:
- given-names: Patrick
family-names: von Platen
- given-names: Suraj
family-names: Patil
- given-names: Anton
family-names: Lozhkov
- given-names: Pedro
family-names: Cuenca
- given-names: Nathan
family-names: Lambert
- given-names: Kashif
family-names: Rasul
- given-names: Mishig
family-names: Davaadorj
- given-names: Dhruv
family-names: Nair
- given-names: Sayak
family-names: Paul
- given-names: Steven
family-names: Liu
- given-names: William
family-names: Berman
- given-names: Yiyi
family-names: Xu
- given-names: Thomas
family-names: Wolf
repository-code: 'https://github.com/huggingface/diffusers'
abstract: >-
Diffusers provides pretrained diffusion models across
multiple modalities, such as vision and audio, and serves
as a modular toolbox for inference and training of
diffusion models.
keywords:
- deep-learning
- pytorch
- image-generation
- hacktoberfest
- diffusion
- text2image
- image2image
- score-based-generative-modeling
- stable-diffusion
- stable-diffusion-diffusers
license: Apache-2.0
version: 0.12.1
GitHub Events
Total
- Member event: 2
- Issue comment event: 1
- Push event: 33
- Pull request event: 31
- Fork event: 4
- Create event: 2
Last Year
- Member event: 2
- Issue comment event: 1
- Push event: 33
- Pull request event: 31
- Fork event: 4
- Create event: 2
Dependencies
- actions/cache v2 composite
- actions/checkout v3 composite
- actions/upload-artifact v4 composite
- actions/checkout v3 composite
- docker/build-push-action v3 composite
- docker/login-action v2 composite
- docker/setup-buildx-action v1 composite
- huggingface/hf-workflows/.github/actions/post-slack main composite
- jitterbit/get-changed-files v1 composite
- actions/checkout v3 composite
- actions/setup-python v4 composite
- actions/checkout v3 composite
- actions/upload-artifact v4 composite
- actions/checkout v3 composite
- actions/setup-python v4 composite
- actions/checkout v3 composite
- actions/setup-python v4 composite
- actions/checkout v3 composite
- actions/setup-python v4 composite
- actions/checkout v3 composite
- actions/upload-artifact v3 composite
- actions/upload-artifact v4 composite
- actions/checkout v3 composite
- actions/setup-python v4 composite
- actions/upload-artifact v4 composite
- actions/checkout v3 composite
- actions/setup-python v4 composite
- actions/upload-artifact v4 composite
- actions/checkout v3 composite
- actions/setup-python v4 composite
- actions/checkout v3 composite
- actions/upload-artifact v4 composite
- actions/checkout v3 composite
- actions/upload-artifact v4 composite
- ./.github/actions/setup-miniconda * composite
- actions/checkout v3 composite
- actions/upload-artifact v4 composite
- actions/checkout v3 composite
- actions/setup-python v4 composite
- actions/checkout v3 composite
- actions/upload-artifact v4 composite
- actions/checkout v4 composite
- actions/checkout v3 composite
- huggingface/tailscale-action main composite
- actions/checkout v3 composite
- huggingface/tailscale-action main composite
- actions/checkout v2 composite
- actions/setup-python v1 composite
- actions/checkout v4 composite
- trufflesecurity/trufflehog main composite
- actions/checkout v3 composite
- crate-ci/typos v1.12.4 composite
- actions/checkout v3 composite
- ubuntu 20.04 build
- ubuntu 20.04 build
- ubuntu 20.04 build
- ubuntu 20.04 build
- nvidia/cuda 12.1.0-runtime-ubuntu20.04 build
- nvidia/cuda 12.1.0-runtime-ubuntu20.04 build
- ubuntu 20.04 build
- nvidia/cuda 12.1.0-runtime-ubuntu20.04 build
- nvidia/cuda 12.1.0-runtime-ubuntu20.04 build
- nvidia/cuda 12.1.0-runtime-ubuntu20.04 build
- Jinja2 *
- accelerate >=0.31.0
- ftfy *
- peft >=0.11.1
- sentencepiece *
- tensorboard *
- torchvision *
- transformers >=4.41.2
- Jinja2 *
- accelerate >=0.31.0
- ftfy *
- peft >=0.11.1
- sentencepiece *
- tensorboard *
- torchvision *
- transformers >=4.41.2
- Jinja2 *
- accelerate >=0.31.0
- decord >=0.6.0
- ftfy *
- imageio-ffmpeg *
- peft >=0.11.1
- sentencepiece *
- tensorboard *
- torchvision *
- transformers >=4.41.2
- accelerate ==1.2.0
- peft >=0.14.0
- torch *
- torchvision *
- transformers ==4.47.0
- wandb *
- Jinja2 *
- accelerate >=0.16.0
- ftfy *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- webdataset *
- accelerate >=0.16.0
- datasets *
- ftfy *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- Jinja2 *
- datasets *
- flax *
- ftfy *
- optax *
- tensorboard *
- torch *
- torchvision *
- transformers >=4.25.1
- Jinja2 *
- SentencePiece *
- accelerate >=0.16.0
- datasets *
- ftfy *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- wandb *
- Jinja2 *
- accelerate >=0.16.0
- datasets *
- ftfy *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- wandb *
- Jinja2 *
- accelerate >=0.16.0
- datasets *
- ftfy *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- wandb *
- Jinja2 *
- accelerate *
- ftfy *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- Jinja2 *
- accelerate >=0.16.0
- ftfy *
- peft ==0.7.0
- tensorboard *
- torchvision *
- transformers >=4.25.1
- Jinja2 *
- flax *
- ftfy *
- optax *
- tensorboard *
- torch *
- torchvision *
- transformers >=4.25.1
- Jinja2 *
- accelerate >=0.31.0
- ftfy *
- peft >=0.11.1
- sentencepiece *
- tensorboard *
- torchvision *
- transformers >=4.41.2
- Jinja2 *
- accelerate >=1.0.0
- ftfy *
- peft >=0.14.0
- sentencepiece *
- tensorboard *
- torchvision *
- transformers >=4.47.0
- Jinja2 *
- accelerate >=0.31.0
- ftfy *
- peft ==0.11.1
- sentencepiece *
- tensorboard *
- torchvision *
- transformers >=4.41.2
- Jinja2 *
- accelerate >=0.16.0
- ftfy *
- peft ==0.7.0
- tensorboard *
- torchvision *
- transformers >=4.25.1
- accelerate ==1.2.0
- peft >=0.14.0
- torch *
- torchvision *
- transformers ==4.47.0
- wandb *
- accelerate >=0.16.0
- datasets *
- ftfy *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- Jinja2 *
- accelerate >=0.16.0
- datasets *
- ftfy *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- huggingface-hub >=0.26.2
- Pillow *
- accelerate >=0.16.0
- bitsandbytes *
- datasets *
- huggingface_hub *
- lpips *
- numpy *
- packaging *
- taming_transformers *
- torch *
- torchvision *
- tqdm *
- transformers *
- wandb *
- xformers *
- Jinja2 *
- diffusers *
- ftfy *
- tensorboard *
- torch *
- torchvision *
- transformers *
- Jinja2 *
- accelerate >=0.16.0
- ftfy *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- Jinja2 *
- accelerate >=0.16.0
- ftfy *
- peft *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- wandb *
- accelerate *
- datasets *
- peft *
- torchvision *
- transformers *
- wandb *
- webdataset *
- Jinja2 *
- accelerate >=0.16.0
- diffusers ==0.9.0
- ftfy *
- tensorboard *
- torchvision *
- transformers >=4.21.0
- Jinja2 *
- accelerate >=0.16.0
- diffusers *
- fairscale *
- ftfy *
- scipy *
- tensorboard *
- timm *
- torchvision *
- transformers >=4.25.1
- wandb *
- Jinja2 *
- accelerate >=0.16.0
- ftfy *
- intel_extension_for_pytorch >=1.13
- tensorboard *
- torchvision *
- transformers >=4.21.0
- accelerate *
- ftfy *
- modelcards *
- neural-compressor *
- tensorboard *
- torchvision *
- transformers >=4.25.0
- accelerate *
- ip_adapter *
- torchvision *
- transformers >=4.25.1
- Jinja2 *
- accelerate >=0.16.0
- datasets *
- ftfy *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- Jinja2 *
- accelerate >=0.16.0
- ftfy *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- Jinja2 *
- accelerate >=0.16.0
- datasets >=2.16.0
- ftfy *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- wandb >=0.16.1
- Jinja2 *
- accelerate >=0.16.0
- ftfy *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- Jinja2 *
- flax *
- ftfy *
- optax *
- tensorboard *
- torch *
- torchvision *
- transformers >=4.25.1
- accelerate >=0.16.0
- datasets *
- ftfy *
- modelcards *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- accelerate >=0.16.0
- ftfy *
- modelcards *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- accelerate >=0.16.0
- datasets *
- tensorboard *
- torchvision *
- SentencePiece *
- controlnet-aux *
- datasets *
- torchvision *
- transformers *
- Jinja2 *
- accelerate >=0.16.0
- datasets >=2.19.1
- ftfy *
- peft ==0.7.0
- tensorboard *
- torchvision *
- transformers >=4.25.1
- Jinja2 ==3.1.6
- accelerate ==0.23.0
- diffusers ==0.20.1
- ftfy ==6.1.1
- peft ==0.5.0
- tensorboard ==2.14.0
- torch ==2.2.0
- torchvision >=0.16
- transformers ==4.38.0
- accelerate >=0.16.0
- bitsandbytes *
- deepspeed *
- peft >=0.6.0
- torchvision *
- transformers >=4.25.1
- wandb *
- aiohttp *
- fastapi *
- prometheus-fastapi-instrumentator >=7.0.0
- prometheus_client >=0.18.0
- py-consul *
- sentencepiece *
- torch *
- transformers ==4.46.1
- uvicorn *
- aiohappyeyeballs ==2.4.3
- aiohttp ==3.10.10
- aiosignal ==1.3.1
- annotated-types ==0.7.0
- anyio ==4.6.2.post1
- attrs ==24.2.0
- certifi ==2024.8.30
- charset-normalizer ==3.4.0
- click ==8.1.7
- fastapi ==0.115.3
- filelock ==3.16.1
- frozenlist ==1.5.0
- fsspec ==2024.10.0
- h11 ==0.14.0
- huggingface-hub ==0.26.1
- idna ==3.10
- jinja2 ==3.1.4
- markupsafe ==3.0.2
- mpmath ==1.3.0
- multidict ==6.1.0
- networkx ==3.4.2
- numpy ==2.1.2
- packaging ==24.1
- prometheus-client ==0.21.0
- prometheus-fastapi-instrumentator ==7.0.0
- propcache ==0.2.0
- py-consul ==1.5.3
- pydantic ==2.9.2
- pydantic-core ==2.23.4
- pyyaml ==6.0.2
- regex ==2024.9.11
- requests ==2.32.3
- safetensors ==0.4.5
- sentencepiece ==0.2.0
- sniffio ==1.3.1
- starlette ==0.41.0
- sympy ==1.13.3
- tokenizers ==0.20.1
- torch ==2.4.1
- tqdm ==4.66.5
- transformers ==4.46.1
- typing-extensions ==4.12.2
- urllib3 ==2.2.3
- uvicorn ==0.32.0
- yarl ==1.16.0
- accelerate >=0.16.0
- datasets *
- ftfy *
- safetensors *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- wandb *
- Jinja2 *
- accelerate >=0.16.0
- datasets >=2.19.1
- ftfy *
- peft ==0.7.0
- tensorboard *
- torchvision *
- transformers >=4.25.1
- Jinja2 *
- datasets *
- flax *
- ftfy *
- optax *
- tensorboard *
- torch *
- torchvision *
- transformers >=4.25.1
- Jinja2 *
- accelerate >=0.22.0
- datasets *
- ftfy *
- peft ==0.7.0
- tensorboard *
- torchvision *
- transformers >=4.25.1
- Jinja2 *
- accelerate >=0.16.0
- ftfy *
- tensorboard *
- torchvision *
- transformers >=4.25.1
- Jinja2 *
- flax *
- ftfy *
- optax *
- tensorboard *
- torch *
- torchvision *
- transformers >=4.25.1
- accelerate >=0.16.0
- datasets *
- torchvision *
- accelerate >=0.16.0
- datasets *
- numpy *
- tensorboard *
- timm *
- torchvision *
- tqdm *
- transformers >=4.25.1
- deps *