xmfan’s gists

xmfan / gist:b64b9cdfb95b9e433cc805633b61841c

Created April 7, 2025 05:32

	Loading pipeline components...: 100%\|█████████████████████████████████████████████████████████████████████████████████████████████████████████████\| 5/5 [00:03<00:00, 1.42it/s]
	Compiling transformer
	Pipeline created and compiled

	Initial compilation/warmup for each resolution
	Warming up at 512x512
	0%\| \| 0/25 [00:00<?, ?it/s]INFO:torch._inductor.cudagraph_trees.__cudagraphs:recording cudagraph tree for symint key (2, 4, 64, 2, 9216, 256, 0, 1000, 256, 2, 4)
	DEBUG:torch._inductor.cudagraph_trees.__cudagraphs:Running warmup of function 0
	4%\|█████▍ \| 1/25 [05:49<2:19:45, 349.40s/it]DEBUG:torch._inductor.cudagraph_trees.__cudagraphs:Recording function 0 of graph recording id 0
	100%\|█████████████████████████████████████████████████████████

xmfan / gist:ecc142b62cc2ee4303f2b6921e5ab4a3

Created April 7, 2025 04:42

	Loading pipeline components...: 100%\|█████████████████████████████████████████████████████████████████████████████████████████████████████████████\| 5/5 [00:00<00:00, 9.31it/s]
	Compiling transformer
	Pipeline created and compiled

	Initial compilation/warmup for each resolution
	Warming up at 512x512
	0%\| \| 0/25 [00:00<?, ?it/s]INFO:torch._inductor.cudagraph_trees.__cudagraphs:recording cudagraph tree for symint key (2, 4, 64, 2, 9216, 256, 0, 1000, 256, 2, 4)
	DEBUG:torch._inductor.cudagraph_trees.__cudagraphs:Running warmup of function 0
	4%\|█████▍ \| 1/25 [04:08<1:39:30, 248.79s/it]DEBUG:torch._inductor.cudagraph_trees.__cudagraphs:Recording function 0 of graph recording id 0
	100%\|█████████████████████████████████████████████████████████

xmfan / gist:f8a2a8da5e5c65441100679a2c762f3a

Created April 7, 2025 03:28

	Loading pipeline components...: 100%\|█████████████████████████████████████████████████████████████████████████████████████████████████████████████\| 5/5 [00:03<00:00, 1.42it/s]
	Compiling transformer
	Pipeline created and compiled

	Initial compilation/warmup for each resolution
	Warming up at 512x512
	100%\|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████\| 25/25 [04:17<00:00, 10.31s/it]
	Warming up at 1024x1024
	100%\|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████\| 25/25 [00:05<00:00, 4.87it/s]
	Warming up at 1280x1024

xmfan / script.py

Created February 7, 2025 00:36

	def count_consecutive_patterns(filename):
	all_reduce_pattern = "\" = torch.ops._c10d_functional.all_reduce("
	all_to_all_pattern = "\" = torch.ops._c10d_functional.all_to_all_single("

	current_streak = 0
	current_pattern = None
	current_start_line = None
	results = []
	line_number = 0
	pattern_matches = [] # Store all pattern matches with line numbers

xmfan / gist:4c8a223afccab68581e62ee78d5e6505

Created February 7, 2025 00:32

	rank29.txt rank7.txt
	------------------------------------------------------------------------------------------------------------------------
	all_reduce appeared 376 times in a row (starting line 3165) all_reduce appeared 376 times in a row (starting line 3059)
	all_to_all appeared 1 times in a row (starting line 14578) all_to_all appeared 1 times in a row (starting line 14472)
	all_reduce appeared 23 times in a row (starting line 14665) all_reduce appeared 23 times in a row (starting line 14558)
	all_to_all appeared 1 times in a row (starting line 15689) all_to_all appeared 1 times in a row (starting line 15582)
	all_reduce appeared 23 times in a row (starting line 15776) all_reduce appeared 23 times in a row (starting line 15668)
	all_to_all appeared 1 times in a row (starting line 16801) all_to_all appeared 1 times in a row (starting line 16693)
	all_reduce appeared 23 times in a row (starting l

xmfan / dist.py

Created January 8, 2025 05:21

	import torch
	import torch.distributed as dist

	torch._dynamo.config.enable_compiler_collectives = True

	dist.init_process_group(backend="nccl")
	rank = dist.get_rank()
	torch.cuda.set_device(rank)

	@torch.compile(backend="eager", fullgraph=True)

xmfan / gist:7374fab55bdf73ba2501de15dd9de709

Created November 21, 2024 01:55

	Traceback (most recent call last):
	File "/home/xmfan/empathy/coqui-ai-TTS/xtts2.py", line 23, in <module>
	warmup()
	File "/home/xmfan/.conda/envs/empathy310/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 465, in _fn
	return fn(args, *kwargs)
	File "/home/xmfan/empathy/coqui-ai-TTS/xtts2.py", line 19, in warmup
	fn()
	File "/home/xmfan/empathy/coqui-ai-TTS/xtts2.py", line 13, in fn
	tts.tts(text="Hello from XTTS2. I am being tested for the torch.compile User Empathy Day on Nov 20th 2024.", speaker_wav="en_sample.wav", language="en")
	File "/home/xmfan/empathy/coqui-ai-TTS/TTS/api.py", line 276, in tts

xmfan / fusedbwdoptim.py

Created November 19, 2024 18:19

Fused backward + Simple optimizer implementation

	import torch
	import torch.nn as nn

	torch._dynamo.config.compiled_autograd = True
	@torch.compile
	def train(model, x):
	loss = model(x).sum()
	loss.backward()
	for param in model.parameters():
	param.grad = None

xmfan / repro.py

Created October 3, 2024 18:16

	import os
	import functools
	import torch
	import torch.nn as nn
	import torch.distributed as dist
	from torch._dynamo import compiled_autograd
	from torch.distributed.distributed_c10d import _get_default_group

	FILE_PATH = "/tmp/chienchin_rendezvous"
	DIM=2000

xmfan / output_code.py

Created October 3, 2024 18:15

	# AOT ID: ['1_inference']
	from ctypes import c_void_p, c_long, c_int
	import torch
	import math
	import random
	import os
	import tempfile
	from math import inf, nan
	from torch._inductor.hooks import run_intermediate_hooks
	from torch._inductor.utils import maybe_profile

Simon Fan xmfan