Packaging IGUANE (#4)

* Make iguane script packageable * Create module iguane Move iguane script into iguane/__main__.py Add a test to check RGUs * Move RAWDATA into a TOML file and import it. * Move FOM functions into dedicated module `fom` * Update pyproject * Update README * Add comments Re-organise imports in test * Use `read_text()` to import resource
mila-iqia · Nov 5, 2024 · 96f9476 · 96f9476
1 parent a4fd850
commit 96f9476
Show file tree

Hide file tree

Showing 12 changed files with 425 additions and 106 deletions.
diff --git a/.gitignore b/.gitignore
@@ -159,4 +159,4 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
diff --git a/README.md b/README.md
@@ -6,7 +6,7 @@ Supported FoMs:
 
 *Examples*:
 
-    $ ./iguane.py -sr
+    $ python -m iguane -sr
      2.59 H100-SXM5-80GB
      2.43 H100-NVL-94GB
      1.96 H100-PCIe-80GB
@@ -26,7 +26,7 @@ Supported FoMs:
      0.25 P100-PCIe-16GB
      0.22 P100-PCIe-12GB
      0.21 T4
-    $ ./iguane.py -pd": "
+    $ pyton -m iguane -pd": "
     P100-PCIe-12GB: 0.22040341999886826
     P100-PCIe-16GB: 0.2539403974908297
     P100-SXM2-16GB: 0.2702130531251874
@@ -46,7 +46,7 @@ Supported FoMs:
     H100-PCIe-80GB: 1.9579954847095848
     H100-SXM5-80GB: 2.586680957484816
     H100-NVL-94GB: 2.4280983913222136
-    $ ./iguane.py -jsru rgu
+    $ python -m iguane -jsru rgu
     {
       "H100-SXM5-80GB": 12.168321513002363,
       "H100-NVL-94GB": 11.091546985815604,
@@ -68,9 +68,9 @@ Supported FoMs:
       "P100-PCIe-16GB": 1.182505910165485,
       "P100-PCIe-12GB": 1.1025059101654848
     }
-    $ ./iguane.py -i mila.json
+    $ python -m iguane -i mila.json
     5886.69
-    $ ./iguane.py -sji mila.json
+    $ python -m iguane -sji mila.json
     {
       "breakdown": {
         "A6000": 39.453049645390074,

diff --git a/iguane/__init__.py b/iguane/__init__.py
diff --git a/iguane.py → iguane/__main__.py b/iguane.py → iguane/__main__.py
@@ -1,10 +1,10 @@
 #!/usr/bin/env python3
 import argparse
-import datetime
 import glob, re
 import json
 import sys
 import pathlib
+from iguane.fom import UGR_VERSIONS, RAWDATA, FOM
 
 
 def matchgpu(name, pat):
@@ -13,12 +13,6 @@ def matchgpu(name, pat):
     if not re.match('[*?\\[\\]]', pat) and name.startswith(pat):
         return True
     return glob.fnmatch.fnmatchcase(name.lower(), pat.lower())
-def fom(f):
-    globals().setdefault('FOM', {})
-    assert f.__name__.startswith('fom_') and f.__name__ != 'fom_', \
-           'Figure-of-Merit function names must start with "fom_"!'
-    FOM[f.__name__[4:]] = f
-    return f
 
 
 #
@@ -193,93 +187,6 @@ def fom(f):
 #                (Il n'y a aucun agrément sur les specs exacts du NVL sur Internet. C'est ludique.)
 #
 
-RAWDATA = {            #             TFLOPS          TFLOPS          TFLOPS           TFLOPS         GB         GB/s        W          Release Date
-    'K80':             dict(fp16=      None, fp32= 4.368000, fp64= 1.456000, tf32=      None, memgb= 12,  membw= 240, tdp=150, reldate="2014-11-17"),
-    'M40':             dict(fp16=      None, fp32= 6.844416, fp64= 0.213888, tf32=      None, memgb= 12,  membw= 288, tdp=250, reldate="2015-11-10"),
-    'P100-PCIe-12GB':  dict(fp16= 18.679808, fp32= 9.339904, fp64= 4.669952, tf32=      None, memgb= 12,  membw= 549, tdp=250, reldate="2016-06-20"),
-    'P100-PCIe-16GB':  dict(fp16= 18.679808, fp32= 9.339904, fp64= 4.669952, tf32=      None, memgb= 16,  membw= 732, tdp=250, reldate="2016-06-20"),
-    'P100-SXM2-16GB':  dict(fp16= 21.217280, fp32=10.608640, fp64= 5.304320, tf32=      None, memgb= 16,  membw= 732, tdp=300, reldate="2016-04-05"),
-    'V100-PCIe-16GB':  dict(fp16=112.230400, fp32=14.028800, fp64= 7.014400, tf32=      None, memgb= 16,  membw= 900, tdp=250, reldate="2017-06-21"),
-    'V100-PCIe-32GB':  dict(fp16=112.230400, fp32=14.028800, fp64= 7.014400, tf32=      None, memgb= 32,  membw= 900, tdp=250, reldate="2018-03-27"),
-    'V100-SXM2-16GB':  dict(fp16=125.337600, fp32=15.667200, fp64= 7.833600, tf32=      None, memgb= 16,  membw= 900, tdp=300, reldate="2017-05-10"),
-    'V100-SXM2-32GB':  dict(fp16=125.337600, fp32=15.667200, fp64= 7.833600, tf32=      None, memgb= 32,  membw= 900, tdp=300, reldate="2018-03-27"),
-    'V100S-PCIe-32GB': dict(fp16=130.826240, fp32=16.353280, fp64= 8.176640, tf32=      None, memgb= 32,  membw=1134, tdp=250, reldate="2019-11-26"),
-    'RTX-2080':        dict(fp16= 80.547840, fp32=10.068480, fp64= 0.314640, tf32=      None, memgb=  8,  membw= 448, tdp=215, reldate="2018-09-20"),
-    'RTX-2080-Super':  dict(fp16= 89.210880, fp32=11.151360, fp64= 0.348480, tf32=      None, memgb=  8,  membw= 496, tdp=250, reldate="2019-07-23"),
-    'RTX-2080-Ti':     dict(fp16=107.581440, fp32=13.447680, fp64= 0.420240, tf32=      None, memgb= 11,  membw= 616, tdp=250, reldate="2018-09-27"),
-    'TITAN-RTX':       dict(fp16=130.498560, fp32=16.312320, fp64= 0.509760, tf32=      None, memgb= 24,  membw= 672, tdp=280, reldate="2018-12-18"),
-    'T4':              dict(fp16= 65.126400, fp32= 8.140800, fp64= 0.254400, tf32=      None, memgb= 16,  membw= 300, tdp= 70, reldate="2018-09-12"),
-    'RTX8000':         dict(fp16=130.498560, fp32=16.312320, fp64= 0.509760, tf32=      None, memgb= 48,  membw= 672, tdp=260, reldate="2018-08-13"),
-    'A100-PCIe-40GB':  dict(fp16=311.869440, fp32=19.491840, fp64= 9.745920, tf32=155.934720, memgb= 40,  membw=1555, tdp=250, reldate="2020-05-14"),
-    'A100-PCIe-80GB':  dict(fp16=311.869440, fp32=19.491840, fp64= 9.745920, tf32=155.934720, memgb= 80,  membw=1555, tdp=250, reldate="2021-06-28"),
-    'A100-SXM4-40GB':  dict(fp16=311.869440, fp32=19.491840, fp64= 9.745920, tf32=155.934720, memgb= 40,  membw=1555, tdp=400, reldate="2020-05-14"),
-    'A100-SXM4-80GB':  dict(fp16=311.869440, fp32=19.491840, fp64= 9.745920, tf32=155.934720, memgb= 80,  membw=1555, tdp=400, reldate="2020-11-16"),
-    'A5000':           dict(fp16=111.083520, fp32=27.770880, fp64= 0.433920, tf32= 55.541760, memgb= 24,  membw= 768, tdp=230, reldate="2021-04-12"),
-    'A6000':           dict(fp16=154.828800, fp32=38.707200, fp64= 0.604800, tf32= 77.414400, memgb= 48,  membw= 768, tdp=300, reldate="2020-10-05"),
-    'RTX-4080':        dict(fp16=194.949120, fp32=48.737280, fp64= 0.761520, tf32= 97.474560, memgb= 16,  membw= 717, tdp=320, reldate="2022-11-16"),
-    'RTX-4080-Super':  dict(fp16=208.896000, fp32=52.224000, fp64= 0.816000, tf32=104.448000, memgb= 16,  membw= 736, tdp=425, reldate="2024-01-31"),
-    'RTX-4090-Laptop': dict(fp16=200.785920, fp32=50.196480, fp64= 0.784320, tf32=100.392960, memgb= 16,  membw= 576, tdp=150, reldate="2023-02-08"),
-    'RTX-4090':        dict(fp16=330.301440, fp32=82.575360, fp64= 1.290240, tf32=165.150720, memgb= 24,  membw=1008, tdp=450, reldate="2022-10-12"),
-    'L40S':            dict(fp16=366.428160, fp32=91.607040, fp64= 1.431360, tf32=183.214080, memgb= 48,  membw= 864, tdp=350, reldate="2023-08-08"),
-    'H100-PCIe-80GB':  dict(fp16=756.449280, fp32=51.217920, fp64=25.608960, tf32=378.224640, memgb= 80,  membw=2039, tdp=350, reldate="2022-03-22"),
-    'H100-SXM5-80GB':  dict(fp16=989.429760, fp32=66.908160, fp64=33.454080, tf32=494.714880, memgb= 80,  membw=3352, tdp=700, reldate="2022-03-22"),
-    'H100-NVL-94GB':   dict(fp16=835.500000, fp32=60.000000, fp64=30.000000, tf32=417.250000, memgb= 94,  membw=3938, tdp=400, reldate="2023-03-21"),
-}
-
-
-UGR_VERSIONS = {
-    # v1.0 is equivalent to UGR
-    # https://docs.alliancecan.ca/wiki/Allocations_and_compute_scheduling
-    "1.0":        { 'fp16': 1.6, 'fp32': 1.6, 'memgb': 0.8 },
-    "1.0-renorm": { 'fp16': 0.4, 'fp32': 0.4, 'memgb': 0.2 },
-}
-
-
-@fom
-def fom_count(name, *, args=None):
-    return 1
-@fom
-def fom_fp16(name, *, args=None):
-    data = RAWDATA[name]
-    return data['fp16'] or data['fp32']
-@fom
-def fom_fp32(name, *, args=None):
-    return RAWDATA[name]['fp32']
-@fom
-def fom_fp64(name, *, args=None):
-    return RAWDATA[name]['fp64']
-@fom
-def fom_tf32(name, *, args=None):
-    data = RAWDATA[name]
-    return data['tf32'] or data['fp32']
-
-@fom
-def fom_ugr(name, *, args=None):
-    weights = UGR_VERSIONS[args.ugr_version if args else "1.0"]
-    ref  = RAWDATA['A100-SXM4-40GB']
-    data = RAWDATA[name].copy()
-    data['tf32'] = data['tf32'] or data['fp32']
-    data['fp16'] = data['fp16'] or data['fp32']
-    return sum([w * (data[k] / ref[k]) for k, w in weights.items()])
-
-@fom
-def fom_iguane(name, *, args=None):
-    ref  = RAWDATA['A100-SXM4-80GB']
-    data = RAWDATA[name].copy()
-    data['tf32'] = data['tf32'] or data['fp32']
-    data['fp16'] = data['fp16'] or data['fp32']
-    weight_fp16  = 0.2
-    weight_fp32  = 0.2
-    weight_tf32  = 0.2
-    weight_memgb = 0.2
-    weight_membw = 0.2
-    return weight_fp16  * (data['fp16']  / ref['fp16'])  + \
-           weight_fp32  * (data['fp32']  / ref['fp32'])  + \
-           weight_tf32  * (data['tf32']  / ref['tf32'])  + \
-           weight_memgb * (data['memgb'] / ref['memgb']) + \
-           weight_membw * (data['membw'] / ref['membw'])
-
-
 
 if __name__ == "__main__":
     argp = argparse.ArgumentParser()
@@ -300,7 +207,7 @@ def fom_iguane(name, *, args=None):
                       help="Input JSON file")
     argp.add_argument('--gpu',  '-G', type=str, default=None,
                       help="Selected GPU")
-    
+
     ogrp = argp.add_argument_group('OUTPUT FORMAT', 'Output format control')
     ogrp.add_argument('--delimiter', '-d', type=str, default=',',
                       help="Delimiter for --parsable output")
@@ -309,7 +216,7 @@ def fom_iguane(name, *, args=None):
                       help="Dump output as JSON")
     omtx.add_argument('--parsable',  '-p', action='store_true',
                       help="Dump output as delimited text")
-    
+
     ugrp = argp.add_argument_group('UNITS', 'Unit/FoM selection')
     umtx = ugrp.add_mutually_exclusive_group()
     umtx.add_argument('--unit', '-u', '--fom', type=str.lower, default='ugr',
@@ -326,12 +233,12 @@ def fom_iguane(name, *, args=None):
     ugrp.add_argument('--ugr-version', type=str, default='1.0',
                       choices=UGR_VERSIONS.keys(),
                       help="Select UGR ponderation version")
-    
+
     # Parse Arguments
     args = argp.parse_args(sys.argv[1:])
     args.unit = {'iguana': 'iguane', 'rgu': 'ugr'}.get(args.unit, args.unit)
-    
-    
+
+
     if   args.list_units:
         units = list(FOM.keys())
         if args.reverse:
@@ -410,5 +317,5 @@ def fom_iguane(name, *, args=None):
                     print(f"{k}{args.delimiter}{DATA[k]}")
                 else:
                     print(f"{DATA[k]:-5.2f} {k}")
-    
+
     sys.exit(0)
diff --git a/iguane/fom.py b/iguane/fom.py
@@ -0,0 +1,84 @@
+import importlib.resources
+import json
+import re
+try:
+    import tomllib
+except ModuleNotFoundError:
+    # for Python before 3.11
+    import pip._vendor.tomli as tomllib
+
+
+# Import RAWDATA (a dictionary mapping GPU name to GPU flops and data) from a resource file
+RAWDATA = tomllib.loads(importlib.resources.read_text("iguane", "rawdata.toml"))
+
+
+UGR_VERSIONS = {
+    # v1.0 is equivalent to UGR
+    # https://docs.alliancecan.ca/wiki/Allocations_and_compute_scheduling
+    "1.0":        { 'fp16': 1.6, 'fp32': 1.6, 'memgb': 0.8 },
+    "1.0-renorm": { 'fp16': 0.4, 'fp32': 0.4, 'memgb': 0.2 },
+}
+
+
+def fom(f):
+    globals().setdefault('FOM', {})
+    assert f.__name__.startswith('fom_') and f.__name__ != 'fom_', \
+           'Figure-of-Merit function names must start with "fom_"!'
+    FOM[f.__name__[4:]] = f
+    return f
+
+
+
+@fom
+def fom_count(name, *, args=None):
+    return 1
+
+
+@fom
+def fom_fp16(name, *, args=None):
+    data = RAWDATA[name]
+    return data['fp16'] or data['fp32']
+
+
+@fom
+def fom_fp32(name, *, args=None):
+    return RAWDATA[name]['fp32']
+
+
+@fom
+def fom_fp64(name, *, args=None):
+    return RAWDATA[name]['fp64']
+
+
+@fom
+def fom_tf32(name, *, args=None):
+    data = RAWDATA[name]
+    return data['tf32'] or data['fp32']
+
+
+@fom
+def fom_ugr(name, *, args=None):
+    weights = UGR_VERSIONS[args.ugr_version if args else "1.0"]
+    ref  = RAWDATA['A100-SXM4-40GB']
+    data = RAWDATA[name].copy()
+    data['tf32'] = data['tf32'] or data['fp32']
+    data['fp16'] = data['fp16'] or data['fp32']
+    return sum([w * (data[k] / ref[k]) for k, w in weights.items()])
+
+
+@fom
+def fom_iguane(name, *, args=None):
+    ref  = RAWDATA['A100-SXM4-80GB']
+    data = RAWDATA[name].copy()
+    data['tf32'] = data['tf32'] or data['fp32']
+    data['fp16'] = data['fp16'] or data['fp32']
+    weight_fp16  = 0.2
+    weight_fp32  = 0.2
+    weight_tf32  = 0.2
+    weight_memgb = 0.2
+    weight_membw = 0.2
+    return weight_fp16  * (data['fp16']  / ref['fp16'])  + \
+           weight_fp32  * (data['fp32']  / ref['fp32'])  + \
+           weight_tf32  * (data['tf32']  / ref['tf32'])  + \
+           weight_memgb * (data['memgb'] / ref['memgb']) + \
+           weight_membw * (data['membw'] / ref['membw'])
diff --git a/iguane/rawdata.toml b/iguane/rawdata.toml
@@ -0,0 +1,31 @@
+#                           TFLOPS          TFLOPS          TFLOPS           TFLOPS         GB         GB/s        W          Release Date
+K80             = {fp16=     false, fp32= 4.368000, fp64= 1.456000, tf32=     false, memgb= 12,  membw= 240, tdp=150, reldate="2014-11-17"}
+M40             = {fp16=     false, fp32= 6.844416, fp64= 0.213888, tf32=     false, memgb= 12,  membw= 288, tdp=250, reldate="2015-11-10"}
+P100-PCIe-12GB  = {fp16= 18.679808, fp32= 9.339904, fp64= 4.669952, tf32=     false, memgb= 12,  membw= 549, tdp=250, reldate="2016-06-20"}
+P100-PCIe-16GB  = {fp16= 18.679808, fp32= 9.339904, fp64= 4.669952, tf32=     false, memgb= 16,  membw= 732, tdp=250, reldate="2016-06-20"}
+P100-SXM2-16GB  = {fp16= 21.217280, fp32=10.608640, fp64= 5.304320, tf32=     false, memgb= 16,  membw= 732, tdp=300, reldate="2016-04-05"}
+V100-PCIe-16GB  = {fp16=112.230400, fp32=14.028800, fp64= 7.014400, tf32=     false, memgb= 16,  membw= 900, tdp=250, reldate="2017-06-21"}
+V100-PCIe-32GB  = {fp16=112.230400, fp32=14.028800, fp64= 7.014400, tf32=     false, memgb= 32,  membw= 900, tdp=250, reldate="2018-03-27"}
+V100-SXM2-16GB  = {fp16=125.337600, fp32=15.667200, fp64= 7.833600, tf32=     false, memgb= 16,  membw= 900, tdp=300, reldate="2017-05-10"}
+V100-SXM2-32GB  = {fp16=125.337600, fp32=15.667200, fp64= 7.833600, tf32=     false, memgb= 32,  membw= 900, tdp=300, reldate="2018-03-27"}
+V100S-PCIe-32GB = {fp16=130.826240, fp32=16.353280, fp64= 8.176640, tf32=     false, memgb= 32,  membw=1134, tdp=250, reldate="2019-11-26"}
+RTX-2080        = {fp16= 80.547840, fp32=10.068480, fp64= 0.314640, tf32=     false, memgb=  8,  membw= 448, tdp=215, reldate="2018-09-20"}
+RTX-2080-Super  = {fp16= 89.210880, fp32=11.151360, fp64= 0.348480, tf32=     false, memgb=  8,  membw= 496, tdp=250, reldate="2019-07-23"}
+RTX-2080-Ti     = {fp16=107.581440, fp32=13.447680, fp64= 0.420240, tf32=     false, memgb= 11,  membw= 616, tdp=250, reldate="2018-09-27"}
+TITAN-RTX       = {fp16=130.498560, fp32=16.312320, fp64= 0.509760, tf32=     false, memgb= 24,  membw= 672, tdp=280, reldate="2018-12-18"}
+T4              = {fp16= 65.126400, fp32= 8.140800, fp64= 0.254400, tf32=     false, memgb= 16,  membw= 300, tdp= 70, reldate="2018-09-12"}
+RTX8000         = {fp16=130.498560, fp32=16.312320, fp64= 0.509760, tf32=     false, memgb= 48,  membw= 672, tdp=260, reldate="2018-08-13"}
+A100-PCIe-40GB  = {fp16=311.869440, fp32=19.491840, fp64= 9.745920, tf32=155.934720, memgb= 40,  membw=1555, tdp=250, reldate="2020-05-14"}
+A100-PCIe-80GB  = {fp16=311.869440, fp32=19.491840, fp64= 9.745920, tf32=155.934720, memgb= 80,  membw=1555, tdp=250, reldate="2021-06-28"}
+A100-SXM4-40GB  = {fp16=311.869440, fp32=19.491840, fp64= 9.745920, tf32=155.934720, memgb= 40,  membw=1555, tdp=400, reldate="2020-05-14"}
+A100-SXM4-80GB  = {fp16=311.869440, fp32=19.491840, fp64= 9.745920, tf32=155.934720, memgb= 80,  membw=1555, tdp=400, reldate="2020-11-16"}
+A5000           = {fp16=111.083520, fp32=27.770880, fp64= 0.433920, tf32= 55.541760, memgb= 24,  membw= 768, tdp=230, reldate="2021-04-12"}
+A6000           = {fp16=154.828800, fp32=38.707200, fp64= 0.604800, tf32= 77.414400, memgb= 48,  membw= 768, tdp=300, reldate="2020-10-05"}
+RTX-4080        = {fp16=194.949120, fp32=48.737280, fp64= 0.761520, tf32= 97.474560, memgb= 16,  membw= 717, tdp=320, reldate="2022-11-16"}
+RTX-4080-Super  = {fp16=208.896000, fp32=52.224000, fp64= 0.816000, tf32=104.448000, memgb= 16,  membw= 736, tdp=425, reldate="2024-01-31"}
+RTX-4090-Laptop = {fp16=200.785920, fp32=50.196480, fp64= 0.784320, tf32=100.392960, memgb= 16,  membw= 576, tdp=150, reldate="2023-02-08"}
+RTX-4090        = {fp16=330.301440, fp32=82.575360, fp64= 1.290240, tf32=165.150720, memgb= 24,  membw=1008, tdp=450, reldate="2022-10-12"}
+L40S            = {fp16=366.428160, fp32=91.607040, fp64= 1.431360, tf32=183.214080, memgb= 48,  membw= 864, tdp=350, reldate="2023-08-08"}
+H100-PCIe-80GB  = {fp16=756.449280, fp32=51.217920, fp64=25.608960, tf32=378.224640, memgb= 80,  membw=2039, tdp=350, reldate="2022-03-22"}
+H100-SXM5-80GB  = {fp16=989.429760, fp32=66.908160, fp64=33.454080, tf32=494.714880, memgb= 80,  membw=3352, tdp=700, reldate="2022-03-22"}
+H100-NVL-94GB   = {fp16=835.500000, fp32=60.000000, fp64=30.000000, tf32=417.250000, memgb= 94,  membw=3938, tdp=400, reldate="2023-03-21"}