Skip to content

Commit

Permalink
Packaging IGUANE (#4)
Browse files Browse the repository at this point in the history
* Make iguane script packageable

* Create module iguane
Move iguane script into iguane/__main__.py
Add a test to check RGUs

* Move RAWDATA into a TOML file and import it.

* Move FOM functions into dedicated module `fom`

* Update pyproject

* Update README

* Add comments
Re-organise imports in test

* Use `read_text()` to import resource
  • Loading branch information
notoraptor authored Nov 5, 2024
1 parent a4fd850 commit 96f9476
Show file tree
Hide file tree
Showing 12 changed files with 425 additions and 106 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -159,4 +159,4 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.idea/
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Supported FoMs:

*Examples*:

$ ./iguane.py -sr
$ python -m iguane -sr
2.59 H100-SXM5-80GB
2.43 H100-NVL-94GB
1.96 H100-PCIe-80GB
Expand All @@ -26,7 +26,7 @@ Supported FoMs:
0.25 P100-PCIe-16GB
0.22 P100-PCIe-12GB
0.21 T4
$ ./iguane.py -pd": "
$ pyton -m iguane -pd": "
P100-PCIe-12GB: 0.22040341999886826
P100-PCIe-16GB: 0.2539403974908297
P100-SXM2-16GB: 0.2702130531251874
Expand All @@ -46,7 +46,7 @@ Supported FoMs:
H100-PCIe-80GB: 1.9579954847095848
H100-SXM5-80GB: 2.586680957484816
H100-NVL-94GB: 2.4280983913222136
$ ./iguane.py -jsru rgu
$ python -m iguane -jsru rgu
{
"H100-SXM5-80GB": 12.168321513002363,
"H100-NVL-94GB": 11.091546985815604,
Expand All @@ -68,9 +68,9 @@ Supported FoMs:
"P100-PCIe-16GB": 1.182505910165485,
"P100-PCIe-12GB": 1.1025059101654848
}
$ ./iguane.py -i mila.json
$ python -m iguane -i mila.json
5886.69
$ ./iguane.py -sji mila.json
$ python -m iguane -sji mila.json
{
"breakdown": {
"A6000": 39.453049645390074,
Expand Down
Empty file added iguane/__init__.py
Empty file.
107 changes: 7 additions & 100 deletions iguane.py → iguane/__main__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#!/usr/bin/env python3
import argparse
import datetime
import glob, re
import json
import sys
import pathlib
from iguane.fom import UGR_VERSIONS, RAWDATA, FOM


def matchgpu(name, pat):
Expand All @@ -13,12 +13,6 @@ def matchgpu(name, pat):
if not re.match('[*?\\[\\]]', pat) and name.startswith(pat):
return True
return glob.fnmatch.fnmatchcase(name.lower(), pat.lower())
def fom(f):
globals().setdefault('FOM', {})
assert f.__name__.startswith('fom_') and f.__name__ != 'fom_', \
'Figure-of-Merit function names must start with "fom_"!'
FOM[f.__name__[4:]] = f
return f


#
Expand Down Expand Up @@ -193,93 +187,6 @@ def fom(f):
# (Il n'y a aucun agrément sur les specs exacts du NVL sur Internet. C'est ludique.)
#

RAWDATA = { # TFLOPS TFLOPS TFLOPS TFLOPS GB GB/s W Release Date
'K80': dict(fp16= None, fp32= 4.368000, fp64= 1.456000, tf32= None, memgb= 12, membw= 240, tdp=150, reldate="2014-11-17"),
'M40': dict(fp16= None, fp32= 6.844416, fp64= 0.213888, tf32= None, memgb= 12, membw= 288, tdp=250, reldate="2015-11-10"),
'P100-PCIe-12GB': dict(fp16= 18.679808, fp32= 9.339904, fp64= 4.669952, tf32= None, memgb= 12, membw= 549, tdp=250, reldate="2016-06-20"),
'P100-PCIe-16GB': dict(fp16= 18.679808, fp32= 9.339904, fp64= 4.669952, tf32= None, memgb= 16, membw= 732, tdp=250, reldate="2016-06-20"),
'P100-SXM2-16GB': dict(fp16= 21.217280, fp32=10.608640, fp64= 5.304320, tf32= None, memgb= 16, membw= 732, tdp=300, reldate="2016-04-05"),
'V100-PCIe-16GB': dict(fp16=112.230400, fp32=14.028800, fp64= 7.014400, tf32= None, memgb= 16, membw= 900, tdp=250, reldate="2017-06-21"),
'V100-PCIe-32GB': dict(fp16=112.230400, fp32=14.028800, fp64= 7.014400, tf32= None, memgb= 32, membw= 900, tdp=250, reldate="2018-03-27"),
'V100-SXM2-16GB': dict(fp16=125.337600, fp32=15.667200, fp64= 7.833600, tf32= None, memgb= 16, membw= 900, tdp=300, reldate="2017-05-10"),
'V100-SXM2-32GB': dict(fp16=125.337600, fp32=15.667200, fp64= 7.833600, tf32= None, memgb= 32, membw= 900, tdp=300, reldate="2018-03-27"),
'V100S-PCIe-32GB': dict(fp16=130.826240, fp32=16.353280, fp64= 8.176640, tf32= None, memgb= 32, membw=1134, tdp=250, reldate="2019-11-26"),
'RTX-2080': dict(fp16= 80.547840, fp32=10.068480, fp64= 0.314640, tf32= None, memgb= 8, membw= 448, tdp=215, reldate="2018-09-20"),
'RTX-2080-Super': dict(fp16= 89.210880, fp32=11.151360, fp64= 0.348480, tf32= None, memgb= 8, membw= 496, tdp=250, reldate="2019-07-23"),
'RTX-2080-Ti': dict(fp16=107.581440, fp32=13.447680, fp64= 0.420240, tf32= None, memgb= 11, membw= 616, tdp=250, reldate="2018-09-27"),
'TITAN-RTX': dict(fp16=130.498560, fp32=16.312320, fp64= 0.509760, tf32= None, memgb= 24, membw= 672, tdp=280, reldate="2018-12-18"),
'T4': dict(fp16= 65.126400, fp32= 8.140800, fp64= 0.254400, tf32= None, memgb= 16, membw= 300, tdp= 70, reldate="2018-09-12"),
'RTX8000': dict(fp16=130.498560, fp32=16.312320, fp64= 0.509760, tf32= None, memgb= 48, membw= 672, tdp=260, reldate="2018-08-13"),
'A100-PCIe-40GB': dict(fp16=311.869440, fp32=19.491840, fp64= 9.745920, tf32=155.934720, memgb= 40, membw=1555, tdp=250, reldate="2020-05-14"),
'A100-PCIe-80GB': dict(fp16=311.869440, fp32=19.491840, fp64= 9.745920, tf32=155.934720, memgb= 80, membw=1555, tdp=250, reldate="2021-06-28"),
'A100-SXM4-40GB': dict(fp16=311.869440, fp32=19.491840, fp64= 9.745920, tf32=155.934720, memgb= 40, membw=1555, tdp=400, reldate="2020-05-14"),
'A100-SXM4-80GB': dict(fp16=311.869440, fp32=19.491840, fp64= 9.745920, tf32=155.934720, memgb= 80, membw=1555, tdp=400, reldate="2020-11-16"),
'A5000': dict(fp16=111.083520, fp32=27.770880, fp64= 0.433920, tf32= 55.541760, memgb= 24, membw= 768, tdp=230, reldate="2021-04-12"),
'A6000': dict(fp16=154.828800, fp32=38.707200, fp64= 0.604800, tf32= 77.414400, memgb= 48, membw= 768, tdp=300, reldate="2020-10-05"),
'RTX-4080': dict(fp16=194.949120, fp32=48.737280, fp64= 0.761520, tf32= 97.474560, memgb= 16, membw= 717, tdp=320, reldate="2022-11-16"),
'RTX-4080-Super': dict(fp16=208.896000, fp32=52.224000, fp64= 0.816000, tf32=104.448000, memgb= 16, membw= 736, tdp=425, reldate="2024-01-31"),
'RTX-4090-Laptop': dict(fp16=200.785920, fp32=50.196480, fp64= 0.784320, tf32=100.392960, memgb= 16, membw= 576, tdp=150, reldate="2023-02-08"),
'RTX-4090': dict(fp16=330.301440, fp32=82.575360, fp64= 1.290240, tf32=165.150720, memgb= 24, membw=1008, tdp=450, reldate="2022-10-12"),
'L40S': dict(fp16=366.428160, fp32=91.607040, fp64= 1.431360, tf32=183.214080, memgb= 48, membw= 864, tdp=350, reldate="2023-08-08"),
'H100-PCIe-80GB': dict(fp16=756.449280, fp32=51.217920, fp64=25.608960, tf32=378.224640, memgb= 80, membw=2039, tdp=350, reldate="2022-03-22"),
'H100-SXM5-80GB': dict(fp16=989.429760, fp32=66.908160, fp64=33.454080, tf32=494.714880, memgb= 80, membw=3352, tdp=700, reldate="2022-03-22"),
'H100-NVL-94GB': dict(fp16=835.500000, fp32=60.000000, fp64=30.000000, tf32=417.250000, memgb= 94, membw=3938, tdp=400, reldate="2023-03-21"),
}


UGR_VERSIONS = {
# v1.0 is equivalent to UGR
# https://docs.alliancecan.ca/wiki/Allocations_and_compute_scheduling
"1.0": { 'fp16': 1.6, 'fp32': 1.6, 'memgb': 0.8 },
"1.0-renorm": { 'fp16': 0.4, 'fp32': 0.4, 'memgb': 0.2 },
}


@fom
def fom_count(name, *, args=None):
return 1
@fom
def fom_fp16(name, *, args=None):
data = RAWDATA[name]
return data['fp16'] or data['fp32']
@fom
def fom_fp32(name, *, args=None):
return RAWDATA[name]['fp32']
@fom
def fom_fp64(name, *, args=None):
return RAWDATA[name]['fp64']
@fom
def fom_tf32(name, *, args=None):
data = RAWDATA[name]
return data['tf32'] or data['fp32']

@fom
def fom_ugr(name, *, args=None):
weights = UGR_VERSIONS[args.ugr_version if args else "1.0"]
ref = RAWDATA['A100-SXM4-40GB']
data = RAWDATA[name].copy()
data['tf32'] = data['tf32'] or data['fp32']
data['fp16'] = data['fp16'] or data['fp32']
return sum([w * (data[k] / ref[k]) for k, w in weights.items()])

@fom
def fom_iguane(name, *, args=None):
ref = RAWDATA['A100-SXM4-80GB']
data = RAWDATA[name].copy()
data['tf32'] = data['tf32'] or data['fp32']
data['fp16'] = data['fp16'] or data['fp32']
weight_fp16 = 0.2
weight_fp32 = 0.2
weight_tf32 = 0.2
weight_memgb = 0.2
weight_membw = 0.2
return weight_fp16 * (data['fp16'] / ref['fp16']) + \
weight_fp32 * (data['fp32'] / ref['fp32']) + \
weight_tf32 * (data['tf32'] / ref['tf32']) + \
weight_memgb * (data['memgb'] / ref['memgb']) + \
weight_membw * (data['membw'] / ref['membw'])



if __name__ == "__main__":
argp = argparse.ArgumentParser()
Expand All @@ -300,7 +207,7 @@ def fom_iguane(name, *, args=None):
help="Input JSON file")
argp.add_argument('--gpu', '-G', type=str, default=None,
help="Selected GPU")

ogrp = argp.add_argument_group('OUTPUT FORMAT', 'Output format control')
ogrp.add_argument('--delimiter', '-d', type=str, default=',',
help="Delimiter for --parsable output")
Expand All @@ -309,7 +216,7 @@ def fom_iguane(name, *, args=None):
help="Dump output as JSON")
omtx.add_argument('--parsable', '-p', action='store_true',
help="Dump output as delimited text")

ugrp = argp.add_argument_group('UNITS', 'Unit/FoM selection')
umtx = ugrp.add_mutually_exclusive_group()
umtx.add_argument('--unit', '-u', '--fom', type=str.lower, default='ugr',
Expand All @@ -326,12 +233,12 @@ def fom_iguane(name, *, args=None):
ugrp.add_argument('--ugr-version', type=str, default='1.0',
choices=UGR_VERSIONS.keys(),
help="Select UGR ponderation version")

# Parse Arguments
args = argp.parse_args(sys.argv[1:])
args.unit = {'iguana': 'iguane', 'rgu': 'ugr'}.get(args.unit, args.unit)


if args.list_units:
units = list(FOM.keys())
if args.reverse:
Expand Down Expand Up @@ -410,5 +317,5 @@ def fom_iguane(name, *, args=None):
print(f"{k}{args.delimiter}{DATA[k]}")
else:
print(f"{DATA[k]:-5.2f} {k}")

sys.exit(0)
84 changes: 84 additions & 0 deletions iguane/fom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import importlib.resources
import json
import re
try:
import tomllib
except ModuleNotFoundError:
# for Python before 3.11
import pip._vendor.tomli as tomllib


# Import RAWDATA (a dictionary mapping GPU name to GPU flops and data) from a resource file
RAWDATA = tomllib.loads(importlib.resources.read_text("iguane", "rawdata.toml"))


UGR_VERSIONS = {
# v1.0 is equivalent to UGR
# https://docs.alliancecan.ca/wiki/Allocations_and_compute_scheduling
"1.0": { 'fp16': 1.6, 'fp32': 1.6, 'memgb': 0.8 },
"1.0-renorm": { 'fp16': 0.4, 'fp32': 0.4, 'memgb': 0.2 },
}


def fom(f):
globals().setdefault('FOM', {})
assert f.__name__.startswith('fom_') and f.__name__ != 'fom_', \
'Figure-of-Merit function names must start with "fom_"!'
FOM[f.__name__[4:]] = f
return f



@fom
def fom_count(name, *, args=None):
return 1


@fom
def fom_fp16(name, *, args=None):
data = RAWDATA[name]
return data['fp16'] or data['fp32']


@fom
def fom_fp32(name, *, args=None):
return RAWDATA[name]['fp32']


@fom
def fom_fp64(name, *, args=None):
return RAWDATA[name]['fp64']


@fom
def fom_tf32(name, *, args=None):
data = RAWDATA[name]
return data['tf32'] or data['fp32']


@fom
def fom_ugr(name, *, args=None):
weights = UGR_VERSIONS[args.ugr_version if args else "1.0"]
ref = RAWDATA['A100-SXM4-40GB']
data = RAWDATA[name].copy()
data['tf32'] = data['tf32'] or data['fp32']
data['fp16'] = data['fp16'] or data['fp32']
return sum([w * (data[k] / ref[k]) for k, w in weights.items()])


@fom
def fom_iguane(name, *, args=None):
ref = RAWDATA['A100-SXM4-80GB']
data = RAWDATA[name].copy()
data['tf32'] = data['tf32'] or data['fp32']
data['fp16'] = data['fp16'] or data['fp32']
weight_fp16 = 0.2
weight_fp32 = 0.2
weight_tf32 = 0.2
weight_memgb = 0.2
weight_membw = 0.2
return weight_fp16 * (data['fp16'] / ref['fp16']) + \
weight_fp32 * (data['fp32'] / ref['fp32']) + \
weight_tf32 * (data['tf32'] / ref['tf32']) + \
weight_memgb * (data['memgb'] / ref['memgb']) + \
weight_membw * (data['membw'] / ref['membw'])
31 changes: 31 additions & 0 deletions iguane/rawdata.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# TFLOPS TFLOPS TFLOPS TFLOPS GB GB/s W Release Date
K80 = {fp16= false, fp32= 4.368000, fp64= 1.456000, tf32= false, memgb= 12, membw= 240, tdp=150, reldate="2014-11-17"}
M40 = {fp16= false, fp32= 6.844416, fp64= 0.213888, tf32= false, memgb= 12, membw= 288, tdp=250, reldate="2015-11-10"}
P100-PCIe-12GB = {fp16= 18.679808, fp32= 9.339904, fp64= 4.669952, tf32= false, memgb= 12, membw= 549, tdp=250, reldate="2016-06-20"}
P100-PCIe-16GB = {fp16= 18.679808, fp32= 9.339904, fp64= 4.669952, tf32= false, memgb= 16, membw= 732, tdp=250, reldate="2016-06-20"}
P100-SXM2-16GB = {fp16= 21.217280, fp32=10.608640, fp64= 5.304320, tf32= false, memgb= 16, membw= 732, tdp=300, reldate="2016-04-05"}
V100-PCIe-16GB = {fp16=112.230400, fp32=14.028800, fp64= 7.014400, tf32= false, memgb= 16, membw= 900, tdp=250, reldate="2017-06-21"}
V100-PCIe-32GB = {fp16=112.230400, fp32=14.028800, fp64= 7.014400, tf32= false, memgb= 32, membw= 900, tdp=250, reldate="2018-03-27"}
V100-SXM2-16GB = {fp16=125.337600, fp32=15.667200, fp64= 7.833600, tf32= false, memgb= 16, membw= 900, tdp=300, reldate="2017-05-10"}
V100-SXM2-32GB = {fp16=125.337600, fp32=15.667200, fp64= 7.833600, tf32= false, memgb= 32, membw= 900, tdp=300, reldate="2018-03-27"}
V100S-PCIe-32GB = {fp16=130.826240, fp32=16.353280, fp64= 8.176640, tf32= false, memgb= 32, membw=1134, tdp=250, reldate="2019-11-26"}
RTX-2080 = {fp16= 80.547840, fp32=10.068480, fp64= 0.314640, tf32= false, memgb= 8, membw= 448, tdp=215, reldate="2018-09-20"}
RTX-2080-Super = {fp16= 89.210880, fp32=11.151360, fp64= 0.348480, tf32= false, memgb= 8, membw= 496, tdp=250, reldate="2019-07-23"}
RTX-2080-Ti = {fp16=107.581440, fp32=13.447680, fp64= 0.420240, tf32= false, memgb= 11, membw= 616, tdp=250, reldate="2018-09-27"}
TITAN-RTX = {fp16=130.498560, fp32=16.312320, fp64= 0.509760, tf32= false, memgb= 24, membw= 672, tdp=280, reldate="2018-12-18"}
T4 = {fp16= 65.126400, fp32= 8.140800, fp64= 0.254400, tf32= false, memgb= 16, membw= 300, tdp= 70, reldate="2018-09-12"}
RTX8000 = {fp16=130.498560, fp32=16.312320, fp64= 0.509760, tf32= false, memgb= 48, membw= 672, tdp=260, reldate="2018-08-13"}
A100-PCIe-40GB = {fp16=311.869440, fp32=19.491840, fp64= 9.745920, tf32=155.934720, memgb= 40, membw=1555, tdp=250, reldate="2020-05-14"}
A100-PCIe-80GB = {fp16=311.869440, fp32=19.491840, fp64= 9.745920, tf32=155.934720, memgb= 80, membw=1555, tdp=250, reldate="2021-06-28"}
A100-SXM4-40GB = {fp16=311.869440, fp32=19.491840, fp64= 9.745920, tf32=155.934720, memgb= 40, membw=1555, tdp=400, reldate="2020-05-14"}
A100-SXM4-80GB = {fp16=311.869440, fp32=19.491840, fp64= 9.745920, tf32=155.934720, memgb= 80, membw=1555, tdp=400, reldate="2020-11-16"}
A5000 = {fp16=111.083520, fp32=27.770880, fp64= 0.433920, tf32= 55.541760, memgb= 24, membw= 768, tdp=230, reldate="2021-04-12"}
A6000 = {fp16=154.828800, fp32=38.707200, fp64= 0.604800, tf32= 77.414400, memgb= 48, membw= 768, tdp=300, reldate="2020-10-05"}
RTX-4080 = {fp16=194.949120, fp32=48.737280, fp64= 0.761520, tf32= 97.474560, memgb= 16, membw= 717, tdp=320, reldate="2022-11-16"}
RTX-4080-Super = {fp16=208.896000, fp32=52.224000, fp64= 0.816000, tf32=104.448000, memgb= 16, membw= 736, tdp=425, reldate="2024-01-31"}
RTX-4090-Laptop = {fp16=200.785920, fp32=50.196480, fp64= 0.784320, tf32=100.392960, memgb= 16, membw= 576, tdp=150, reldate="2023-02-08"}
RTX-4090 = {fp16=330.301440, fp32=82.575360, fp64= 1.290240, tf32=165.150720, memgb= 24, membw=1008, tdp=450, reldate="2022-10-12"}
L40S = {fp16=366.428160, fp32=91.607040, fp64= 1.431360, tf32=183.214080, memgb= 48, membw= 864, tdp=350, reldate="2023-08-08"}
H100-PCIe-80GB = {fp16=756.449280, fp32=51.217920, fp64=25.608960, tf32=378.224640, memgb= 80, membw=2039, tdp=350, reldate="2022-03-22"}
H100-SXM5-80GB = {fp16=989.429760, fp32=66.908160, fp64=33.454080, tf32=494.714880, memgb= 80, membw=3352, tdp=700, reldate="2022-03-22"}
H100-NVL-94GB = {fp16=835.500000, fp32=60.000000, fp64=30.000000, tf32=417.250000, memgb= 94, membw=3938, tdp=400, reldate="2023-03-21"}
Loading

0 comments on commit 96f9476

Please sign in to comment.