From 80a038b4642022764561282ff3251b87efcd8664 Mon Sep 17 00:00:00 2001 From: Yunseon Shin Date: Mon, 1 Dec 2025 14:27:58 +0000 Subject: [PATCH 01/21] [refactor] NoC configuration --- TOGSim/configs/heterogeneous_c2_simple_noc.json | 2 +- TOGSim/configs/stonne_big_c1_simple_noc.json | 2 +- TOGSim/configs/stonne_single_c1_simple_noc.json | 2 +- TOGSim/configs/stonne_validation_c1_simple_noc.json | 2 +- TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv2.json | 2 +- TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json | 2 +- .../configs/systolic_ws_128x128_c1_simple_noc_tpuv3_half.json | 2 +- TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv4.json | 2 +- .../systolic_ws_128x128_c2_booksim_tpuv3_bw_quarter.json | 2 +- TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json | 2 +- TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json | 2 +- .../systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json | 2 +- TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json | 2 +- TOGSim/configs/systolic_ws_8x8_c1_12G_simple_noc.json | 2 +- TOGSim/configs/systolic_ws_8x8_c1_24G_simple_noc.json | 2 +- TOGSim/configs/systolic_ws_8x8_c1_48G_simple_noc.json | 2 +- TOGSim/configs/systolic_ws_8x8_c1_booksim.json | 2 +- TOGSim/configs/systolic_ws_8x8_c1_simple_noc.json | 2 +- TOGSim/configs/systolic_ws_8x8_c2_12G_simple_noc.json | 2 +- TOGSim/configs/systolic_ws_8x8_c2_24G_simple_noc.json | 2 +- TOGSim/configs/systolic_ws_8x8_c2_48G_simple_noc.json | 2 +- TOGSim/src/Common.cc | 4 ++-- 22 files changed, 23 insertions(+), 23 deletions(-) diff --git a/TOGSim/configs/heterogeneous_c2_simple_noc.json b/TOGSim/configs/heterogeneous_c2_simple_noc.json index 60f160a8..293fe385 100644 --- a/TOGSim/configs/heterogeneous_c2_simple_noc.json +++ b/TOGSim/configs/heterogeneous_c2_simple_noc.json @@ -17,7 +17,7 @@ "ramulator_config_path" : "../configs/ramulator2_configs/HBM2_TPUv3.yaml", "icnt_type" : "simple", - "icnt_latency" : 7, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 940, "icnt_injection_ports_per_core" : 16, diff --git a/TOGSim/configs/stonne_big_c1_simple_noc.json b/TOGSim/configs/stonne_big_c1_simple_noc.json index 5d563fbe..0a8ca3c2 100644 --- a/TOGSim/configs/stonne_big_c1_simple_noc.json +++ b/TOGSim/configs/stonne_big_c1_simple_noc.json @@ -16,7 +16,7 @@ "ramulator_config_path" : "../configs/ramulator2_configs/HBM2_TPUv3.yaml", "icnt_type" : "simple", - "icnt_latency" : 7, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 940, "icnt_injection_ports_per_core" : 16 } \ No newline at end of file diff --git a/TOGSim/configs/stonne_single_c1_simple_noc.json b/TOGSim/configs/stonne_single_c1_simple_noc.json index 304e84b3..3421d4f1 100644 --- a/TOGSim/configs/stonne_single_c1_simple_noc.json +++ b/TOGSim/configs/stonne_single_c1_simple_noc.json @@ -16,7 +16,7 @@ "ramulator_config_path" : "../configs/ramulator2_configs/HBM2.yaml", "icnt_type" : "simple", - "icnt_latency" : 7, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 700, "icnt_injection_ports_per_core" : 8 } \ No newline at end of file diff --git a/TOGSim/configs/stonne_validation_c1_simple_noc.json b/TOGSim/configs/stonne_validation_c1_simple_noc.json index 38d4244c..fb196dfb 100644 --- a/TOGSim/configs/stonne_validation_c1_simple_noc.json +++ b/TOGSim/configs/stonne_validation_c1_simple_noc.json @@ -17,7 +17,7 @@ "l2d_config" : "S:128:128:64,32,L:T:m:W:L,A:192:4,32:0,32", "icnt_type" : "simple", - "icnt_latency" : 7, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 1000, "icnt_injection_ports_per_core" : 8 } \ No newline at end of file diff --git a/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv2.json b/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv2.json index 1257891c..d2e5790e 100644 --- a/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv2.json +++ b/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv2.json @@ -12,7 +12,7 @@ "ramulator_config_path" : "../configs/ramulator2_configs/HBM2.yaml", "icnt_type" : "simple", - "icnt_latency" : 7, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 700, "icnt_injection_ports_per_core" : 16 } \ No newline at end of file diff --git a/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json b/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json index b92d8029..828e44ca 100644 --- a/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json +++ b/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json @@ -13,7 +13,7 @@ "ramulator_config_path" : "../configs/ramulator2_configs/HBM2_TPUv3.yaml", "icnt_type" : "simple", - "icnt_latency" : 7, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 940, "icnt_injection_ports_per_core" : 16 } \ No newline at end of file diff --git a/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3_half.json b/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3_half.json index 34896fc7..292967ac 100644 --- a/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3_half.json +++ b/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3_half.json @@ -13,7 +13,7 @@ "ramulator_config_path" : "../configs/ramulator2_configs/HBM2_TPUv3.yaml", "icnt_type" : "simple", - "icnt_latency" : 7, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 940, "icnt_injection_ports_per_core" : 16 } \ No newline at end of file diff --git a/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv4.json b/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv4.json index 59be9fd4..01156589 100644 --- a/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv4.json +++ b/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv4.json @@ -15,7 +15,7 @@ "l2d_config" : "S:128:128:512,32,L:T:m:W:L,A:192:4,32:0,32", "icnt_type" : "simple", - "icnt_latency" : 7, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 1050, "icnt_injection_ports_per_core" : 16 } \ No newline at end of file diff --git a/TOGSim/configs/systolic_ws_128x128_c2_booksim_tpuv3_bw_quarter.json b/TOGSim/configs/systolic_ws_128x128_c2_booksim_tpuv3_bw_quarter.json index 7382c4c8..7cc113e6 100644 --- a/TOGSim/configs/systolic_ws_128x128_c2_booksim_tpuv3_bw_quarter.json +++ b/TOGSim/configs/systolic_ws_128x128_c2_booksim_tpuv3_bw_quarter.json @@ -15,7 +15,7 @@ "dram_config_path" : "../configs/ramulator2_configs/HBM2_TPUv3.yaml", "icnt_type" : "booksim2", - "icnt_latency" : 1, + "icnt_latency_cycles" : 10, "icnt_freq" : 940, "icnt_injection_ports_per_core" : 16, "icnt_config_path" : "../configs/booksim2_configs/fly_c32_m8.icnt", diff --git a/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json b/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json index 2207f2b9..89847917 100644 --- a/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json +++ b/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json @@ -12,7 +12,7 @@ "ramulator_config_path" : "../configs/ramulator2_configs/HBM2.yaml", "icnt_type" : "simple", - "icnt_latency" : 7, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 700, "icnt_injection_ports_per_core" : 16 } \ No newline at end of file diff --git a/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json b/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json index 76f51b40..593c78f2 100644 --- a/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json +++ b/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json @@ -13,7 +13,7 @@ "ramulator_config_path" : "../configs/ramulator2_configs/HBM2_TPUv3.yaml", "icnt_type" : "simple", - "icnt_latency" : 7, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 940, "icnt_injection_ports_per_core" : 16 } \ No newline at end of file diff --git a/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json b/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json index 42e003c7..bd6cb071 100644 --- a/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json +++ b/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json @@ -13,7 +13,7 @@ "ramulator_config_path" : "../configs/ramulator2_configs/HBM2_TPUv3.yaml", "icnt_type" : "simple", - "icnt_latency" : 7, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 940, "icnt_injection_ports_per_core" : 16, diff --git a/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json b/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json index 44ec72fe..f8b0fb95 100644 --- a/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json +++ b/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json @@ -15,7 +15,7 @@ "l2d_config" : "S:64:128:512,32,L:B:m:W:L,A:192:4,32:0,32", "icnt_type" : "simple", - "icnt_latency" : 7, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 1050, "icnt_injection_ports_per_core" : 16 } \ No newline at end of file diff --git a/TOGSim/configs/systolic_ws_8x8_c1_12G_simple_noc.json b/TOGSim/configs/systolic_ws_8x8_c1_12G_simple_noc.json index 045407b7..65236e3f 100644 --- a/TOGSim/configs/systolic_ws_8x8_c1_12G_simple_noc.json +++ b/TOGSim/configs/systolic_ws_8x8_c1_12G_simple_noc.json @@ -12,6 +12,6 @@ "ramulator_config_path" : "../configs/ramulator2_configs/DDR4.yaml", "icnt_type" : "simple", - "icnt_latency" : 1, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 1000 } \ No newline at end of file diff --git a/TOGSim/configs/systolic_ws_8x8_c1_24G_simple_noc.json b/TOGSim/configs/systolic_ws_8x8_c1_24G_simple_noc.json index d8f95d70..a1fe4d12 100644 --- a/TOGSim/configs/systolic_ws_8x8_c1_24G_simple_noc.json +++ b/TOGSim/configs/systolic_ws_8x8_c1_24G_simple_noc.json @@ -12,6 +12,6 @@ "ramulator_config_path" : "../configs/ramulator2_configs/DDR4.yaml", "icnt_type" : "simple", - "icnt_latency" : 1, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 1000 } \ No newline at end of file diff --git a/TOGSim/configs/systolic_ws_8x8_c1_48G_simple_noc.json b/TOGSim/configs/systolic_ws_8x8_c1_48G_simple_noc.json index a5fa9585..c1431f6d 100644 --- a/TOGSim/configs/systolic_ws_8x8_c1_48G_simple_noc.json +++ b/TOGSim/configs/systolic_ws_8x8_c1_48G_simple_noc.json @@ -12,6 +12,6 @@ "ramulator_config_path" : "../configs/ramulator2_configs/DDR4.yaml", "icnt_type" : "simple", - "icnt_latency" : 1, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 1000 } \ No newline at end of file diff --git a/TOGSim/configs/systolic_ws_8x8_c1_booksim.json b/TOGSim/configs/systolic_ws_8x8_c1_booksim.json index cf560171..0f42812d 100644 --- a/TOGSim/configs/systolic_ws_8x8_c1_booksim.json +++ b/TOGSim/configs/systolic_ws_8x8_c1_booksim.json @@ -12,6 +12,6 @@ "ramulator_config_path" : "../configs/ramulator2_configs/DDR4.yaml", "icnt_type" : "booksim2", - "icnt_latency" : 1, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 1000 } \ No newline at end of file diff --git a/TOGSim/configs/systolic_ws_8x8_c1_simple_noc.json b/TOGSim/configs/systolic_ws_8x8_c1_simple_noc.json index 8da61d72..5bb742bd 100644 --- a/TOGSim/configs/systolic_ws_8x8_c1_simple_noc.json +++ b/TOGSim/configs/systolic_ws_8x8_c1_simple_noc.json @@ -12,6 +12,6 @@ "ramulator_config_path" : "../configs/ramulator2_configs/DDR4.yaml", "icnt_type" : "simple", - "icnt_latency" : 1, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 1000 } \ No newline at end of file diff --git a/TOGSim/configs/systolic_ws_8x8_c2_12G_simple_noc.json b/TOGSim/configs/systolic_ws_8x8_c2_12G_simple_noc.json index c5f429f9..33fb2e7f 100644 --- a/TOGSim/configs/systolic_ws_8x8_c2_12G_simple_noc.json +++ b/TOGSim/configs/systolic_ws_8x8_c2_12G_simple_noc.json @@ -13,6 +13,6 @@ "ramulator_config_path" : "../configs/ramulator2_configs/DDR4.yaml", "icnt_type" : "simple", - "icnt_latency" : 1, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 1000 } \ No newline at end of file diff --git a/TOGSim/configs/systolic_ws_8x8_c2_24G_simple_noc.json b/TOGSim/configs/systolic_ws_8x8_c2_24G_simple_noc.json index 254520be..9c4cbb5c 100644 --- a/TOGSim/configs/systolic_ws_8x8_c2_24G_simple_noc.json +++ b/TOGSim/configs/systolic_ws_8x8_c2_24G_simple_noc.json @@ -12,6 +12,6 @@ "ramulator_config_path" : "../configs/ramulator2_configs/DDR4.yaml", "icnt_type" : "simple", - "icnt_latency" : 1, + "icnt_latency_cycle" : 10, "icnt_freq_mhz" : 1000 } \ No newline at end of file diff --git a/TOGSim/configs/systolic_ws_8x8_c2_48G_simple_noc.json b/TOGSim/configs/systolic_ws_8x8_c2_48G_simple_noc.json index e39867a7..143703aa 100644 --- a/TOGSim/configs/systolic_ws_8x8_c2_48G_simple_noc.json +++ b/TOGSim/configs/systolic_ws_8x8_c2_48G_simple_noc.json @@ -12,6 +12,6 @@ "ramulator_config_path" : "../configs/ramulator2_configs/DDR4.yaml", "icnt_type" : "simple", - "icnt_latency" : 1, + "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 1000 } \ No newline at end of file diff --git a/TOGSim/src/Common.cc b/TOGSim/src/Common.cc index b5c092b3..336f2188 100644 --- a/TOGSim/src/Common.cc +++ b/TOGSim/src/Common.cc @@ -111,8 +111,8 @@ SimulationConfig initialize_config(json config) { throw std::runtime_error(fmt::format("Not implemented icnt type {} ", (std::string)config["icnt_type"])); parsed_config.icnt_freq_mhz = config["icnt_freq_mhz"]; - if (config.contains("icnt_latency")) - parsed_config.icnt_latency = config["icnt_latency"]; + if (config.contains("icnt_latency_cycles")) + parsed_config.icnt_latency = config["icnt_latency_cycles"]; if (config.contains("booksim_config_path")) parsed_config.icnt_config_path = config["booksim_config_path"]; if (config.contains("icnt_stats_print_period_cycles")) From eebdbcb1e25040b1fd379f885afc75c330816e24 Mon Sep 17 00:00:00 2001 From: Yunseon Shin Date: Mon, 1 Dec 2025 16:07:04 +0000 Subject: [PATCH 02/21] [refactor] backsim to togsim --- PyTorchSimFrontend/extension_codecache.py | 8 ++++---- PyTorchSimFrontend/extension_op.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/PyTorchSimFrontend/extension_codecache.py b/PyTorchSimFrontend/extension_codecache.py index 577c45e9..bddf2f0c 100644 --- a/PyTorchSimFrontend/extension_codecache.py +++ b/PyTorchSimFrontend/extension_codecache.py @@ -296,10 +296,10 @@ def dummy_simulator(*args, **kwargs): onnx_path = os.path.join(result_path, "tile_graph.onnx") attribute_path = os.path.join(runtime_path, "attribute") togsim_path = os.path.join(extension_config.CONFIG_TORCHSIM_DIR, "TOGSim") - backsim = TOGSimulator(togsim_path, extension_config.CONFIG_TOGSIM_CONFIG) - backsim.vectorlane_size = vectorlane_size - attribute_path = backsim.create_attribute_file(attribute_path, args, loop_size=loop_size) - result_path = backsim.simulation(onnx_path, attribute_path, silent_mode=silent_mode) + TOGSim = TOGSimulator(togsim_path, extension_config.CONFIG_TOGSIM_CONFIG) + TOGSim.vectorlane_size = vectorlane_size + attribute_path = TOGSim.create_attribute_file(attribute_path, args, loop_size=loop_size) + result_path = TOGSim.simulation(onnx_path, attribute_path, silent_mode=silent_mode) result = TOGSimulator.get_result_from_file(result_path) return result diff --git a/PyTorchSimFrontend/extension_op.py b/PyTorchSimFrontend/extension_op.py index 167544f2..f0d0f20e 100644 --- a/PyTorchSimFrontend/extension_op.py +++ b/PyTorchSimFrontend/extension_op.py @@ -277,8 +277,8 @@ def sparse_mm_stonne_outer(a, b, out): togsim_path = os.path.join(extension_config.CONFIG_TORCHSIM_DIR, "TOGSim") stonne_config_path = f'{extension_config.CONFIG_TORCHSIM_DIR}/TOGSim/configs/stonne_single_c1_simple_noc.json' - backsim = TOGSimulator(togsim_path, stonne_config_path) - result_path = backsim.simulation(onnx_path) + TOGSim = TOGSimulator(togsim_path, stonne_config_path) + result_path = TOGSim.simulation(onnx_path) TOGSimulator.get_result_from_file(result_path) # Load result data From cef4e30b6ac70e135c4bb53c8cce06fdfd856770 Mon Sep 17 00:00:00 2001 From: Yunseon Shin Date: Mon, 1 Dec 2025 16:08:30 +0000 Subject: [PATCH 03/21] [refactor] reduce log & dump json --- PyTorchSimFrontend/extension_config.py | 34 +++++++++---------- .../mlir/mlir_codegen_backend.py | 9 +++-- PyTorchSimFrontend/mlir/mlir_template.py | 4 ++- Simulator/simulator.py | 3 +- TOGSim/src/Common.cc | 2 ++ 5 files changed, 30 insertions(+), 22 deletions(-) diff --git a/PyTorchSimFrontend/extension_config.py b/PyTorchSimFrontend/extension_config.py index 3d6fbb76..40c776a2 100644 --- a/PyTorchSimFrontend/extension_config.py +++ b/PyTorchSimFrontend/extension_config.py @@ -17,26 +17,26 @@ def __getattr__(name): "spad_size" : int(os.environ.get("TORCHSIM_SPAD_SIZE", default=128)) << 10 # Note: spad size per lane } if name == "CONFIG_PRECISION": - return 4 # 32bit + return 4 # 32bit if name == "CONFIG_NUM_CORES": - return 1 + return 1 if name == "CONFIG_VLEN": - return 256 # 256bits / 32bits = 8 [elements] + return 256 # 256bits / 32bits = 8 [elements] # Tile size config if name == "CONFIG_TORCHSIM_DIR": return os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim') if name == "CONFIG_TORCHSIM_DUMP_PATH": - return os.environ.get('TORCHSIM_DUMP_PATH', default = f"{tempfile.gettempdir()}/torchinductor") + return os.environ.get('TORCHSIM_DUMP_PATH', default = __getattr__("CONFIG_TORCHSIM_DIR")) if name == "CONFIG_TORCHSIM_DUMP_FILE": - return int(os.environ.get('TORCHSIM_DUMP_FILE', default=True)) + return int(os.environ.get('TORCHSIM_DUMP_FILE', default=True)) if name == "CONFIG_TORCHSIM_FUNCTIONAL_MODE": - return int(os.environ.get('TORCHSIM_FUNCTIONAL_MODE', default=True)) + return int(os.environ.get('TORCHSIM_FUNCTIONAL_MODE', default=True)) if name == "CONFIG_TORCHSIM_TIMING_MODE": - return int(os.environ.get("TORCHSIM_TIMING_MODE", True)) + return int(os.environ.get("TORCHSIM_TIMING_MODE", True)) if name == "CONFIG_CLEANUP_DUMP_ARGS": - return int(os.environ.get('CLEANUP_DUMP_ARGS', default=False)) + return int(os.environ.get('CLEANUP_DUMP_ARGS', default=False)) # LLVM PATH if name == "CONFIG_TORCHSIM_LLVM_PATH": @@ -91,8 +91,6 @@ def __getattr__(name): if name == "CONFIG_TILE_K": return int(os.getenv("TORCHSIM_TILE_K", __getattr__("CONFIG_VECTOR_LANE"))) - if name == "CONFIG_SUBTILE": - return int(os.environ.get('TORCHSIM_SUBTILE', default=True)) if name == "CONFIG_MANUAL_SUBTILE_SIZE": return int(os.environ.get('TORCHSIM_MANUAL_SUBTILE_SIZE', default=False)) if name == "CONFIG_SUBTILE_M": @@ -107,20 +105,22 @@ def __getattr__(name): default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/validation/gemm_tpuv3_cheatsheet.json") # Compiler Optimization if name == "CONFIG_COMPILER_OPTIMIZATION": - return os.environ.get('TORCHSIM_COMPILER_OPTIMIZATION', default="all") # options: all, none, custom + return os.environ.get('TORCHSIM_COMPILER_OPTIMIZATION', default="all") # options: all, none, custom # Advanced fusion options if name == "CONFIG_FUSION": - return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "fusion" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False + return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "fusion" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False if name == "CONFIG_FUSION_REDUCTION_EPILOGUE": - return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "reduction_epliogue" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False + return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "reduction_epliogue" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False if name == "CONFIG_FUSION_REDUCTION_REDUCTION": - return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "reduction_reduction" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False + return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "reduction_reduction" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False if name == "CONFIG_FUSION_PROLOGUE": - return True if ((__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all") or ("prologue" in __getattr__("CONFIG_COMPILER_OPTIMIZATION"))) else False + return True if ((__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all") or ("prologue" in __getattr__("CONFIG_COMPILER_OPTIMIZATION"))) else False if name == "CONFIG_SINGLE_BATCH_CONV": - return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "single_batch_conv" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False + return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "single_batch_conv" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False if name == "CONFIG_MULTI_TILE_CONV": - return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "multi_tile_conv" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False + return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "multi_tile_conv" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False + if name == "CONFIG_SUBTILE": + return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "subtile" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False # SRAM Buffer allocation plan def load_plan_from_module(module_path): diff --git a/PyTorchSimFrontend/mlir/mlir_codegen_backend.py b/PyTorchSimFrontend/mlir/mlir_codegen_backend.py index c24260ce..001f9305 100644 --- a/PyTorchSimFrontend/mlir/mlir_codegen_backend.py +++ b/PyTorchSimFrontend/mlir/mlir_codegen_backend.py @@ -1572,7 +1572,8 @@ def make_choices(self, nodes, kernel_name): current_tile_sz = tuple(self.kernel_group.tile_desc.get_tile_size()) search_space.add(current_tile_sz) - print(f"[Auto-tune] Trying tile size: {list(current_tile_sz)}, vlane_stride: {self.kernel_group.tile_desc.vmap.vlane_stride}, split_axis: {self.kernel_group.tile_desc.vmap.vlane_split_axis}") + if extension_config.CONFIG_DEBUG_MODE: + print(f"[Auto-tune] Trying tile size: {list(current_tile_sz)}, vlane_stride: {self.kernel_group.tile_desc.vmap.vlane_stride}, split_axis: {self.kernel_group.tile_desc.vmap.vlane_split_axis}") self._prepare_simulator_headers(src_code) bench_runner = self.run_bench(nodes, kernel_name, src_code) choices.append((bench_runner, src_code, current_tile_sz, self.kernel_group.tile_desc.vmap.vlane_stride)) @@ -1614,7 +1615,8 @@ def make_choices(self, nodes, kernel_name): # Add this choice search_space.add(current_tile_sz) - print(f"[Auto-tune] Trying tile size: {list(current_tile_sz)}, vlane_stride: {self.kernel_group.tile_desc.vmap.vlane_stride}, split_axis: {self.kernel_group.tile_desc.vmap.vlane_split_axis}") + if extension_config.CONFIG_DEBUG_MODE: + print(f"[Auto-tune] Trying tile size: {list(current_tile_sz)}, vlane_stride: {self.kernel_group.tile_desc.vmap.vlane_stride}, split_axis: {self.kernel_group.tile_desc.vmap.vlane_split_axis}") self._prepare_simulator_headers(src_code) bench_runner = self.run_bench(nodes, kernel_name, src_code) choices.append((bench_runner, src_code, self.kernel_group.tile_desc.get_tile_size(), self.kernel_group.tile_desc.vmap.vlane_stride)) @@ -1641,7 +1643,8 @@ def get_cycle(choice): max_idx = results.index(min(results)) if min(results) == float("inf"): raise RuntimeError("Failed to find optimal tile size...") - self._log_autotune_result(choices[max_idx], results[max_idx]) + if extension_config.CONFIG_DEBUG_MODE: + self._log_autotune_result(choices[max_idx], results[max_idx]) optimal_src_code, loop_size = choices[max_idx][1], choices[max_idx][-1] return optimal_src_code, loop_size diff --git a/PyTorchSimFrontend/mlir/mlir_template.py b/PyTorchSimFrontend/mlir/mlir_template.py index df3621eb..b463ca6e 100644 --- a/PyTorchSimFrontend/mlir/mlir_template.py +++ b/PyTorchSimFrontend/mlir/mlir_template.py @@ -491,7 +491,9 @@ def codegen_template_code(self, render, template_node, prologue_nodes, epilogue_ def make_choices(self, tile_candidates, render, template_node, prologue_nodes, epilogue_nodes): choices = [] for tile_info in tile_candidates: - print(f"[Auto-tune] Trying tile size: {list(tile_info)}") + if extension_config.CONFIG_DEBUG_MODE: + # Compute Tile M, N, K DMA Tile M, N, K + print(f"[Auto-tune] Trying tile size: {list(tile_info)}") src_code = self.codegen_template_code(render, template_node, prologue_nodes, epilogue_nodes, tile_info) bench_runner = self.run_bench([template_node], self.kernel_name, src_code) choices.append((bench_runner, src_code, tile_info, self.loop_size)) diff --git a/Simulator/simulator.py b/Simulator/simulator.py index 0b241a50..e0fc1a64 100644 --- a/Simulator/simulator.py +++ b/Simulator/simulator.py @@ -248,7 +248,8 @@ def show_progress(): result_path = os.path.join(result_path, file_name) with open(result_path, "w") as f: f.write(result.decode()) - print(f'[TOGSim] Simulation of "{model_path}" is stored to "{result_path}"') + if not silent_mode or extension_config.CONFIG_DEBUG_MODE: + print(f'[TOGSim] Simulation of "{model_path}" is stored to "{result_path}"') return result_path def interactive_simulation(self): diff --git a/TOGSim/src/Common.cc b/TOGSim/src/Common.cc index 336f2188..9a6b7798 100644 --- a/TOGSim/src/Common.cc +++ b/TOGSim/src/Common.cc @@ -16,6 +16,8 @@ T get_config_value(json config, std::string key) { SimulationConfig initialize_config(json config) { SimulationConfig parsed_config; + // print json + spdlog::info("TOGSim Config: {}", config.dump(2)); /* Core configs */ parsed_config.num_cores = config["num_cores"]; From 59b150cf506cc0df6af47954d9b4095f706a86dc Mon Sep 17 00:00:00 2001 From: Yunseon Shin Date: Mon, 1 Dec 2025 16:08:55 +0000 Subject: [PATCH 04/21] [Tutorial] jupyter update --- tutorial/session1/CompilerOptimization.ipynb | 79 ++++++++--- tutorial/session1/DNNServing.ipynb | 46 +++++-- tutorial/session1/ExecutionMode.ipynb | 134 +++++++++++++++---- tutorial/session1/HelloPyTorchSim.ipynb | 57 +++++--- tutorial/session1/LogAnalysis.ipynb | 52 +++++-- tutorial/session1/Mapping.ipynb | 126 ++++++++++++----- tutorial/session1/Training.ipynb | 4 +- 7 files changed, 383 insertions(+), 115 deletions(-) diff --git a/tutorial/session1/CompilerOptimization.ipynb b/tutorial/session1/CompilerOptimization.ipynb index 7c0cdb08..dec4a383 100644 --- a/tutorial/session1/CompilerOptimization.ipynb +++ b/tutorial/session1/CompilerOptimization.ipynb @@ -9,8 +9,16 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, + "execution_count": 1, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T16:02:57.028461Z", + "iopub.status.busy": "2025-12-01T16:02:57.027718Z", + "iopub.status.idle": "2025-12-01T16:02:58.396341Z", + "shell.execute_reply": "2025-12-01T16:02:58.394939Z", + "shell.execute_reply.started": "2025-12-01T16:02:57.028437Z" + } + }, "outputs": [], "source": [ "import torch\n", @@ -18,7 +26,6 @@ "import sys\n", "base_dir = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim')\n", "sys.path.append(base_dir)\n", - "os.environ['TORCHSIM_DUMP_PATH']=base_dir\n", "os.environ['TORCHSIM_FUNCTIONAL_MODE']=\"0\"\n", "os.environ['TORCHSIM_TIMING_MODE']=\"1\"" ] @@ -32,8 +39,16 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, + "execution_count": 2, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T16:02:58.403173Z", + "iopub.status.busy": "2025-12-01T16:02:58.403009Z", + "iopub.status.idle": "2025-12-01T16:03:17.426642Z", + "shell.execute_reply": "2025-12-01T16:03:17.425212Z", + "shell.execute_reply.started": "2025-12-01T16:02:58.403156Z" + } + }, "outputs": [ { "name": "stderr", @@ -52,7 +67,7 @@ "text": [ "ninja: no work to do.\n", "Wrapper Codegen Path = /tmp/torchinductor_root/3z/c3zx4dfsx2o24goyevxgy4upevdsyxegbadiquz7z33ttsbs22a6.py\n", - "[Gem5] Gem5 is running. \n", + "[Gem5] Gem5 is running... \n", "[TOGSim] TOGSim is running. \n", "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/5o2xythi5z3/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/5o2xythi5z3/togsim_result/0\"\n" ] @@ -74,14 +89,22 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, + "execution_count": 3, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T16:03:17.428533Z", + "iopub.status.busy": "2025-12-01T16:03:17.428158Z", + "iopub.status.idle": "2025-12-01T16:03:17.637165Z", + "shell.execute_reply": "2025-12-01T16:03:17.635970Z", + "shell.execute_reply.started": "2025-12-01T16:03:17.428513Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[2025-11-30 20:03:06.792] [info] Total execution cycle: 50316\n" + "[2025-12-01 16:03:17.222] [info] Total execution cycles: 51715\n" ] } ], @@ -98,19 +121,27 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, + "execution_count": 4, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T16:03:17.638828Z", + "iopub.status.busy": "2025-12-01T16:03:17.638591Z", + "iopub.status.idle": "2025-12-01T16:03:30.063552Z", + "shell.execute_reply": "2025-12-01T16:03:30.062137Z", + "shell.execute_reply.started": "2025-12-01T16:03:17.638808Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Wrapper Codegen Path = /tmp/torchinductor_root/6s/c6sm56korxlpospvj434xqahpah3qitivib3y66vxn6teip2uh32.py\n", + "Wrapper Codegen Path = /tmp/torchinductor_root/qi/cqi2yw3qrkfpe7v2zwm2hydottmvrhqqzqmmhaen7ozcvkkcrvro.py\n", "[Gem5] Gem5 is running... \n", "[Gem5] Gem5 is running.. \n", "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/4\"\n", - "[TOGSim] TOGSim is running.. \n", + "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/x2ueokr7kg3/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/x2ueokr7kg3/togsim_result/0\"\n", + "[TOGSim] TOGSim is running... \n", "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/37dfo4nczcq/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/37dfo4nczcq/togsim_result/0\"\n" ] } @@ -131,19 +162,27 @@ { "cell_type": "code", "execution_count": 6, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T16:03:51.827713Z", + "iopub.status.busy": "2025-12-01T16:03:51.827336Z", + "iopub.status.idle": "2025-12-01T16:03:52.225778Z", + "shell.execute_reply": "2025-12-01T16:03:52.224365Z", + "shell.execute_reply.started": "2025-12-01T16:03:51.827690Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[2025-11-30 20:03:30.690] [info] Total execution cycle: 46996\n", - "[2025-11-30 20:03:32.178] [info] Total execution cycle: 58455\n" + "[2025-12-01 16:03:27.953] [info] Total execution cycles: 49999\n", + "[2025-12-01 16:03:29.117] [info] Total execution cycles: 66093\n" ] } ], "source": [ - "!cat /root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/4 | grep \"Total execution cycle\"\n", + "!cat /root/workspace/PyTorchSim/tmp/x2ueokr7kg3/togsim_result/0 | grep \"Total execution cycle\"\n", "!cat /root/workspace/PyTorchSim/tmp/37dfo4nczcq/togsim_result/0 | grep \"Total execution cycle\"" ] }, @@ -157,7 +196,7 @@ ], "metadata": { "kernelspec": { - "display_name": "base", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -175,5 +214,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/tutorial/session1/DNNServing.ipynb b/tutorial/session1/DNNServing.ipynb index f6645b8f..3067822b 100644 --- a/tutorial/session1/DNNServing.ipynb +++ b/tutorial/session1/DNNServing.ipynb @@ -10,7 +10,15 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T16:01:10.802611Z", + "iopub.status.busy": "2025-12-01T16:01:10.802338Z", + "iopub.status.idle": "2025-12-01T16:01:12.056183Z", + "shell.execute_reply": "2025-12-01T16:01:12.055040Z", + "shell.execute_reply.started": "2025-12-01T16:01:10.802591Z" + } + }, "outputs": [], "source": [ "import torch\n", @@ -29,16 +37,36 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 2, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T16:01:12.057637Z", + "iopub.status.busy": "2025-12-01T16:01:12.057366Z", + "iopub.status.idle": "2025-12-01T16:01:13.474161Z", + "shell.execute_reply": "2025-12-01T16:01:13.472936Z", + "shell.execute_reply.started": "2025-12-01T16:01:12.057620Z" + } + }, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Scheduler.__init__() got an unexpected keyword argument 'backend_config'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[2], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mScheduler\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mscheduler\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Scheduler, SchedulerDNNModel, Request\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mPyTorchSimFrontend\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mextension_config\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CONFIG_TORCHSIM_BACKEND_CONFIG\n\u001b[0;32m----> 6\u001b[0m scheduler \u001b[38;5;241m=\u001b[39m \u001b[43mScheduler\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_request_queue\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mengine_select\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mScheduler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mFIFO_ENGINE\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbackend_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mCONFIG_TORCHSIM_BACKEND_CONFIG\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7\u001b[0m device \u001b[38;5;241m=\u001b[39m scheduler\u001b[38;5;241m.\u001b[39mexecution_engine\u001b[38;5;241m.\u001b[39mmodule\u001b[38;5;241m.\u001b[39mcustom_device()\n\u001b[1;32m 9\u001b[0m model \u001b[38;5;241m=\u001b[39m resnet18()\u001b[38;5;241m.\u001b[39meval()\n", + "\u001b[0;31mTypeError\u001b[0m: Scheduler.__init__() got an unexpected keyword argument 'backend_config'" + ] + } + ], "source": [ "import torch\n", "from torchvision.models import resnet18\n", "from Scheduler.scheduler import Scheduler, SchedulerDNNModel, Request\n", - "from PyTorchSimFrontend.extension_config import CONFIG_TORCHSIM_BACKEND_CONFIG\n", + "from PyTorchSimFrontend import extension_config\n", "\n", - "scheduler = Scheduler(num_request_queue=1, engine_select=Scheduler.FIFO_ENGINE, backend_config=CONFIG_TORCHSIM_BACKEND_CONFIG)\n", + "scheduler = Scheduler(num_request_queue=1, engine_select=Scheduler.FIFO_ENGINE, togsim_config=extension_config.CONFIG_TOGSIM_CONFIG)\n", "device = scheduler.execution_engine.module.custom_device()\n", "\n", "model = resnet18().eval()\n", @@ -83,7 +111,7 @@ "target_model1 = resnet18().eval()\n", "\n", "# Init scheduler\n", - "scheduler = Scheduler(num_request_queue=1, max_batch=32, engine_select=Scheduler.FIFO_ENGINE, backend_config=f\"{CONFIG_TORCHSIM_DIR}/PyTorchSimBackend/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json\")\n", + "scheduler = Scheduler(num_request_queue=1, max_batch=32, engine_select=Scheduler.FIFO_ENGINE, backend_config=f\"{CONFIG_TORCHSIM_DIR}/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json\")\n", "# Register compiled model\n", "opt_model1 = torch.compile(target_model1.to(device=scheduler.execution_engine.module.custom_device(), memory_format=torch.channels_last), dynamic=False)\n", "SchedulerDNNModel.register_model(\"resnet18\", opt_model1)\n", @@ -108,7 +136,7 @@ ], "metadata": { "kernelspec": { - "display_name": "base", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -126,5 +154,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/tutorial/session1/ExecutionMode.ipynb b/tutorial/session1/ExecutionMode.ipynb index 663fe5d6..bebf7951 100644 --- a/tutorial/session1/ExecutionMode.ipynb +++ b/tutorial/session1/ExecutionMode.ipynb @@ -10,7 +10,15 @@ { "cell_type": "code", "execution_count": 1, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:07:50.977314Z", + "iopub.status.busy": "2025-12-01T15:07:50.976976Z", + "iopub.status.idle": "2025-12-01T15:07:52.257401Z", + "shell.execute_reply": "2025-12-01T15:07:52.256027Z", + "shell.execute_reply.started": "2025-12-01T15:07:50.977284Z" + } + }, "outputs": [], "source": [ "import torch\n", @@ -30,7 +38,15 @@ { "cell_type": "code", "execution_count": 2, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:07:56.915608Z", + "iopub.status.busy": "2025-12-01T15:07:56.915105Z", + "iopub.status.idle": "2025-12-01T15:08:16.513755Z", + "shell.execute_reply": "2025-12-01T15:08:16.512040Z", + "shell.execute_reply.started": "2025-12-01T15:07:56.915587Z" + } + }, "outputs": [ { "name": "stderr", @@ -52,7 +68,7 @@ "[Gem5] Gem5 is running.. \n", "[Spike] Running Spike simulator\n", "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/tmp/torchinductor/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/tmp/torchinductor/tmp/4q4qv6gbpia/togsim_result/7\"\n" + "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/3\"\n" ] } ], @@ -80,7 +96,15 @@ { "cell_type": "code", "execution_count": 3, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:08:16.515946Z", + "iopub.status.busy": "2025-12-01T15:08:16.515491Z", + "iopub.status.idle": "2025-12-01T15:08:24.623667Z", + "shell.execute_reply": "2025-12-01T15:08:24.622036Z", + "shell.execute_reply.started": "2025-12-01T15:08:16.515923Z" + } + }, "outputs": [ { "name": "stdout", @@ -111,14 +135,22 @@ { "cell_type": "code", "execution_count": 4, - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:08:24.625148Z", + "iopub.status.busy": "2025-12-01T15:08:24.624921Z", + "iopub.status.idle": "2025-12-01T15:08:27.657175Z", + "shell.execute_reply": "2025-12-01T15:08:27.655628Z", + "shell.execute_reply.started": "2025-12-01T15:08:24.625129Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/tmp/torchinductor/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/tmp/torchinductor/tmp/4q4qv6gbpia/togsim_result/8\"\n" + "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/4\"\n" ] } ], @@ -143,23 +175,33 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, + "execution_count": 5, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:08:27.658146Z", + "iopub.status.busy": "2025-12-01T15:08:27.657941Z", + "iopub.status.idle": "2025-12-01T15:08:59.079718Z", + "shell.execute_reply": "2025-12-01T15:08:59.078585Z", + "shell.execute_reply.started": "2025-12-01T15:08:27.658128Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ + "Wrapper Codegen Path = /tmp/torchinductor_root/m6/cm63zhmgb7n2askwt37lf72xuvbgpk6uvtmexreuxosqt3g5466s.py\n", + "[Gem5] Gem5 is running.. \n", "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/tmp/torchinductor/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/tmp/torchinductor/tmp/4q4qv6gbpia/togsim_result/9\"\n" + "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/hb6bvrfkqds/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/hb6bvrfkqds/togsim_result/0\"\n" ] } ], "source": [ "os.environ['TORCHSIM_CONFIG']=f\"{base_dir}/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json\"\n", "\n", - "input = torch.randn(1024, 1024).to(device=device)\n", - "weight = torch.randn(1024, 1024).to(device=device)\n", + "input = torch.randn(2048, 2048).to(device=device)\n", + "weight = torch.randn(2048, 2048).to(device=device)\n", "\n", "opt_fn = torch.compile(dynamic=False)(torch.matmul)\n", "npu_out = opt_fn(input, weight)" @@ -167,19 +209,27 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, + "execution_count": 6, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:09:46.743678Z", + "iopub.status.busy": "2025-12-01T15:09:46.743216Z", + "iopub.status.idle": "2025-12-01T15:09:46.967813Z", + "shell.execute_reply": "2025-12-01T15:09:46.966078Z", + "shell.execute_reply.started": "2025-12-01T15:09:46.743653Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[2025-11-30 19:52:07.495] [info] Total execution cycle: 47171\n" + "[2025-12-01 15:08:58.162] [info] Total execution cycles: 299113\n" ] } ], "source": [ - "!cat /tmp/torchinductor/tmp/4q4qv6gbpia/togsim_result/9 | grep \"Total execution cycle\"" + "!cat /root/workspace/PyTorchSim/tmp/hb6bvrfkqds/togsim_result/0 | grep \"Total execution cycle\"" ] }, { @@ -191,23 +241,31 @@ }, { "cell_type": "code", - "execution_count": 9, - "metadata": {}, + "execution_count": 7, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:09:57.045456Z", + "iopub.status.busy": "2025-12-01T15:09:57.044956Z", + "iopub.status.idle": "2025-12-01T15:10:23.158900Z", + "shell.execute_reply": "2025-12-01T15:10:23.157486Z", + "shell.execute_reply.started": "2025-12-01T15:09:57.045420Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[TOGSim] TOGSim is running.. \n", - "[TOGSim] Simulation of \"/tmp/torchinductor/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/tmp/torchinductor/tmp/4q4qv6gbpia/togsim_result/11\"\n" + "[TOGSim] TOGSim is running... \n", + "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/hb6bvrfkqds/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/hb6bvrfkqds/togsim_result/1\"\n" ] } ], "source": [ "os.environ['TORCHSIM_CONFIG']=f\"{base_dir}/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json\"\n", "\n", - "input = torch.randn(1024, 1024).to(device=device)\n", - "weight = torch.randn(1024, 1024).to(device=device)\n", + "input = torch.randn(2048, 2048).to(device=device)\n", + "weight = torch.randn(2048, 2048).to(device=device)\n", "\n", "opt_fn = torch.compile(dynamic=False)(torch.matmul)\n", "npu_out = opt_fn(input, weight)" @@ -215,25 +273,47 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": {}, + "execution_count": 8, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:10:25.410879Z", + "iopub.status.busy": "2025-12-01T15:10:25.410424Z", + "iopub.status.idle": "2025-12-01T15:10:25.626504Z", + "shell.execute_reply": "2025-12-01T15:10:25.625016Z", + "shell.execute_reply.started": "2025-12-01T15:10:25.410844Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[2025-11-30 19:53:32.005] [info] Total execution cycle: 40716\n" + "[2025-12-01 15:10:22.824] [info] Total execution cycles: 167394\n" ] } ], "source": [ - "!cat /tmp/torchinductor/tmp/4q4qv6gbpia/togsim_result/11 | grep \"Total execution cycle\"" + "!cat /root/workspace/PyTorchSim/tmp/hb6bvrfkqds/togsim_result/1 | grep \"Total execution cycle\"" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "base", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -251,5 +331,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/tutorial/session1/HelloPyTorchSim.ipynb b/tutorial/session1/HelloPyTorchSim.ipynb index 68430937..be7acafa 100644 --- a/tutorial/session1/HelloPyTorchSim.ipynb +++ b/tutorial/session1/HelloPyTorchSim.ipynb @@ -9,8 +9,16 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": {}, + "execution_count": 8, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:00:24.548912Z", + "iopub.status.busy": "2025-12-01T15:00:24.548584Z", + "iopub.status.idle": "2025-12-01T15:00:24.552810Z", + "shell.execute_reply": "2025-12-01T15:00:24.551884Z", + "shell.execute_reply.started": "2025-12-01T15:00:24.548889Z" + } + }, "outputs": [], "source": [ "import torch\n", @@ -30,8 +38,16 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, + "execution_count": 9, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:00:26.339225Z", + "iopub.status.busy": "2025-12-01T15:00:26.338820Z", + "iopub.status.idle": "2025-12-01T15:00:26.376456Z", + "shell.execute_reply": "2025-12-01T15:00:26.375714Z", + "shell.execute_reply.started": "2025-12-01T15:00:26.339187Z" + } + }, "outputs": [], "source": [ "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", @@ -53,17 +69,23 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, + "execution_count": 11, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:00:55.016492Z", + "iopub.status.busy": "2025-12-01T15:00:55.016117Z", + "iopub.status.idle": "2025-12-01T15:00:56.241124Z", + "shell.execute_reply": "2025-12-01T15:00:56.239891Z", + "shell.execute_reply.started": "2025-12-01T15:00:55.016471Z" + } + }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", - "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/npu/build.ninja...\n", - "Building extension module npu...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", + "No modifications detected for re-loaded extension module npu, skipping build step...\n", "Loading extension module npu...\n" ] }, @@ -71,12 +93,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "ninja: no work to do.\n", - "Wrapper Codegen Path = /tmp/torchinductor_root/ro/croutbd6yxrzgdstfcplx7yrpn2do5frwhyx2md5r7rvrubdhdgd.py\n", - "[Gem5] Gem5 is running... \n", "[Spike] Running Spike simulator\n", "[TOGSim] TOGSim is running.. \n", - "[TOGSim] Simulation of \"/tmp/torchinductor/tmp/fy6nnyudtno/tile_graph.onnx\" is stored to \"/tmp/torchinductor/tmp/fy6nnyudtno/togsim_result/0\"\n" + "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/fy6nnyudtno/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/fy6nnyudtno/togsim_result/3\"\n" ] } ], @@ -94,8 +113,16 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, + "execution_count": 7, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T14:23:02.977576Z", + "iopub.status.busy": "2025-12-01T14:23:02.977359Z", + "iopub.status.idle": "2025-12-01T14:23:02.982914Z", + "shell.execute_reply": "2025-12-01T14:23:02.981939Z", + "shell.execute_reply.started": "2025-12-01T14:23:02.977557Z" + } + }, "outputs": [], "source": [ "def test_result(name, npu_out, cpu_out, rtol=1e-4, atol=1e-4):\n", diff --git a/tutorial/session1/LogAnalysis.ipynb b/tutorial/session1/LogAnalysis.ipynb index 8dd7ad0d..70b2418c 100644 --- a/tutorial/session1/LogAnalysis.ipynb +++ b/tutorial/session1/LogAnalysis.ipynb @@ -9,8 +9,16 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, + "execution_count": 1, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:02:31.262456Z", + "iopub.status.busy": "2025-12-01T15:02:31.262104Z", + "iopub.status.idle": "2025-12-01T15:02:32.582082Z", + "shell.execute_reply": "2025-12-01T15:02:32.580928Z", + "shell.execute_reply.started": "2025-12-01T15:02:31.262435Z" + } + }, "outputs": [], "source": [ "import torch\n", @@ -18,7 +26,6 @@ "import sys\n", "base_dir = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim')\n", "sys.path.append(base_dir)\n", - "os.environ['TORCHSIM_DUMP_PATH']=base_dir\n", "os.environ['TORCHSIM_FUNCTIONAL_MODE']=\"0\"\n", "os.environ['TORCHSIM_TIMING_MODE']=\"1\"" ] @@ -32,15 +39,25 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, + "execution_count": 2, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:02:32.583264Z", + "iopub.status.busy": "2025-12-01T15:02:32.582996Z", + "iopub.status.idle": "2025-12-01T15:02:44.433624Z", + "shell.execute_reply": "2025-12-01T15:02:44.432023Z", + "shell.execute_reply.started": "2025-12-01T15:02:32.583247Z" + } + }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", - "No modifications detected for re-loaded extension module npu, skipping build step...\n", + "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/npu/build.ninja...\n", + "Building extension module npu...\n", + "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", "Loading extension module npu...\n" ] }, @@ -48,8 +65,11 @@ "name": "stdout", "output_type": "stream", "text": [ + "ninja: no work to do.\n", + "Wrapper Codegen Path = /tmp/torchinductor_root/yr/cyrl4zqohiglmrez32dmaijhd3sfdh4xabea5splhxwtwckiykpr.py\n", + "[Gem5] Gem5 is running.. \n", "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/2\"\n" + "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/0\"\n" ] } ], @@ -73,15 +93,23 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, + "execution_count": 3, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:02:44.434717Z", + "iopub.status.busy": "2025-12-01T15:02:44.434416Z", + "iopub.status.idle": "2025-12-01T15:02:47.466577Z", + "shell.execute_reply": "2025-12-01T15:02:47.464858Z", + "shell.execute_reply.started": "2025-12-01T15:02:44.434698Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/3\"\n" + "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/1\"\n" ] } ], @@ -105,7 +133,7 @@ ], "metadata": { "kernelspec": { - "display_name": "base", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -123,5 +151,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/tutorial/session1/Mapping.ipynb b/tutorial/session1/Mapping.ipynb index c38601f8..15b1a1c1 100644 --- a/tutorial/session1/Mapping.ipynb +++ b/tutorial/session1/Mapping.ipynb @@ -9,8 +9,16 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, + "execution_count": 1, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:47:11.380236Z", + "iopub.status.busy": "2025-12-01T15:47:11.380048Z", + "iopub.status.idle": "2025-12-01T15:47:12.604801Z", + "shell.execute_reply": "2025-12-01T15:47:12.604050Z", + "shell.execute_reply.started": "2025-12-01T15:47:11.380218Z" + } + }, "outputs": [], "source": [ "import torch\n", @@ -29,8 +37,16 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, + "execution_count": 2, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:47:12.605523Z", + "iopub.status.busy": "2025-12-01T15:47:12.605335Z", + "iopub.status.idle": "2025-12-01T15:47:31.694085Z", + "shell.execute_reply": "2025-12-01T15:47:31.693144Z", + "shell.execute_reply.started": "2025-12-01T15:47:12.605510Z" + } + }, "outputs": [ { "name": "stderr", @@ -52,7 +68,7 @@ "[Gem5] Gem5 is running.. \n", "[Spike] Running Spike simulator\n", "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/tmp/torchinductor/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/tmp/torchinductor/tmp/4q4qv6gbpia/togsim_result/6\"\n" + "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/2\"\n" ] } ], @@ -69,19 +85,27 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, + "execution_count": 3, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:47:31.695314Z", + "iopub.status.busy": "2025-12-01T15:47:31.695049Z", + "iopub.status.idle": "2025-12-01T15:47:31.893585Z", + "shell.execute_reply": "2025-12-01T15:47:31.892528Z", + "shell.execute_reply.started": "2025-12-01T15:47:31.695296Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[2025-11-30 18:53:14.002] [info] Total execution cycle: 47158\n" + "[2025-12-01 15:44:22.862] [info] Total execution cycles: 48536\n" ] } ], "source": [ - "!cat /tmp/torchinductor/tmp/4q4qv6gbpia/togsim_result/0 | grep \"Total execution cycle\"" + "!cat /root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/0 | grep \"Total execution cycle\"" ] }, { @@ -94,8 +118,16 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, + "execution_count": 4, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:47:31.894500Z", + "iopub.status.busy": "2025-12-01T15:47:31.894332Z", + "iopub.status.idle": "2025-12-01T15:47:54.385682Z", + "shell.execute_reply": "2025-12-01T15:47:54.384358Z", + "shell.execute_reply.started": "2025-12-01T15:47:31.894485Z" + } + }, "outputs": [ { "name": "stdout", @@ -105,7 +137,7 @@ "[Gem5] Gem5 is running. \n", "[Spike] Running Spike simulator\n", "[TOGSim] TOGSim is running... \n", - "[TOGSim] Simulation of \"/tmp/torchinductor/tmp/75hiq5mugpq/tile_graph.onnx\" is stored to \"/tmp/torchinductor/tmp/75hiq5mugpq/togsim_result/1\"\n" + "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/75hiq5mugpq/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/75hiq5mugpq/togsim_result/1\"\n" ] } ], @@ -126,19 +158,27 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": {}, + "execution_count": 5, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:47:54.386633Z", + "iopub.status.busy": "2025-12-01T15:47:54.386431Z", + "iopub.status.idle": "2025-12-01T15:47:54.585477Z", + "shell.execute_reply": "2025-12-01T15:47:54.584126Z", + "shell.execute_reply.started": "2025-12-01T15:47:54.386615Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[2025-11-30 18:54:00.878] [info] Total execution cycle: 53704\n" + "[2025-12-01 15:44:47.602] [info] Total execution cycles: 53419\n" ] } ], "source": [ - "!cat /tmp/torchinductor/tmp/75hiq5mugpq/togsim_result/0 | grep \"Total execution cycle\"" + "!cat /root/workspace/PyTorchSim/tmp/75hiq5mugpq/togsim_result/0 | grep \"Total execution cycle\"" ] }, { @@ -150,23 +190,26 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, + "execution_count": 6, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:47:54.587092Z", + "iopub.status.busy": "2025-12-01T15:47:54.586748Z", + "iopub.status.idle": "2025-12-01T15:48:12.586081Z", + "shell.execute_reply": "2025-12-01T15:48:12.584431Z", + "shell.execute_reply.started": "2025-12-01T15:47:54.587070Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[Auto-tune] Trying tile size: [1024, 1024, 256, 128, 1024, 256]\n", - "[Auto-tune] Trying tile size: [256, 1024, 1024, 128, 1024, 1024]\n", - "[Auto-tune] Trying tile size: [1024, 256, 1024, 128, 256, 1024]\n", - "[Auto-tune] Trying tile size: [1024, 1024, 128, 128, 1024, 128]\n", - "[Auto-tune] Optimal tile size: [1024, 1024, 128, 128, 1024, 128], cycles: 46423\n", - "Wrapper Codegen Path = /tmp/torchinductor_root/xe/cxeovwxl3i2e2mljoiqq35wn6uzsksisgoqqx7pzrbwl3lj6um34.py\n", + "Wrapper Codegen Path = /tmp/torchinductor_root/gg/cgg5mmgs4wa3u5od6mgxuausnbthyzfsnnauqtszfuq7qvl2qrj2.py\n", "[Gem5] Gem5 is running.. \n", "[Spike] Running Spike simulator\n", "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/tmp/torchinductor/tmp/x27ipc5avjg/tile_graph.onnx\" is stored to \"/tmp/torchinductor/tmp/x27ipc5avjg/togsim_result/3\"\n" + "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/3\"\n" ] } ], @@ -182,19 +225,42 @@ "npu_out = opt_fn(input, weight)" ] }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "execution": { + "iopub.execute_input": "2025-12-01T15:48:12.587335Z", + "iopub.status.busy": "2025-12-01T15:48:12.586969Z", + "iopub.status.idle": "2025-12-01T15:48:12.795718Z", + "shell.execute_reply": "2025-12-01T15:48:12.794347Z", + "shell.execute_reply.started": "2025-12-01T15:48:12.587305Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2025-12-01 15:45:20.498] [info] Total execution cycles: 48564\n" + ] + } + ], + "source": [ + "!cat /root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/1 | grep \"Total execution cycle\"" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "!cat /tmp/torchinductor/tmp/x27ipc5avjg/togsim_result/1 | grep \"Total execution cycle\"" - ] + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "base", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -212,5 +278,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/tutorial/session1/Training.ipynb b/tutorial/session1/Training.ipynb index 2deff8b4..9c39cadd 100644 --- a/tutorial/session1/Training.ipynb +++ b/tutorial/session1/Training.ipynb @@ -176,7 +176,7 @@ ], "metadata": { "kernelspec": { - "display_name": "base", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -194,5 +194,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } From 4709a5b4be23378fa884831772bf41dd82367847 Mon Sep 17 00:00:00 2001 From: Yunseon Shin Date: Tue, 2 Dec 2025 08:53:29 +0000 Subject: [PATCH 05/21] [refactor] dump log modified --- PyTorchSimFrontend/extension_config.py | 5 ++++- Scheduler/scheduler.py | 2 +- Simulator/simulator.py | 7 ++++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/PyTorchSimFrontend/extension_config.py b/PyTorchSimFrontend/extension_config.py index 40c776a2..4f950893 100644 --- a/PyTorchSimFrontend/extension_config.py +++ b/PyTorchSimFrontend/extension_config.py @@ -2,6 +2,7 @@ import sys import tempfile import importlib +import datetime def __getattr__(name): @@ -28,7 +29,9 @@ def __getattr__(name): return os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim') if name == "CONFIG_TORCHSIM_DUMP_PATH": - return os.environ.get('TORCHSIM_DUMP_PATH', default = __getattr__("CONFIG_TORCHSIM_DIR")) + return os.environ.get('TORCHSIM_DUMP_PATH', default = f"{tempfile.gettempdir()}/torchinductor") + if name == "CONFIG_TORCHSIM_LOG_PATH": + return os.environ.get('TORCHSIM_DUMP_LOG_PATH', default = os.path.join(__getattr__("CONFIG_TORCHSIM_DIR"), "outputs", datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))) if name == "CONFIG_TORCHSIM_DUMP_FILE": return int(os.environ.get('TORCHSIM_DUMP_FILE', default=True)) if name == "CONFIG_TORCHSIM_FUNCTIONAL_MODE": diff --git a/Scheduler/scheduler.py b/Scheduler/scheduler.py index 0b633fa9..3bf28016 100644 --- a/Scheduler/scheduler.py +++ b/Scheduler/scheduler.py @@ -222,7 +222,7 @@ def is_all_idle(self): return all([self.is_partition_idle(i) for i in range(self.num_partion)]) def prepare_model(self, req_model: SchedulerDNNModel): - result_path = os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "togsim_result", req_model.model_name) + result_path = os.path.join(extension_config.CONFIG_TORCHSIM_LOG_PATH, "togsim_result", req_model.model_name) os.makedirs(result_path, exist_ok=True) index = str(len(os.listdir(result_path))) diff --git a/Simulator/simulator.py b/Simulator/simulator.py index e0fc1a64..b16ee4cf 100644 --- a/Simulator/simulator.py +++ b/Simulator/simulator.py @@ -242,14 +242,15 @@ def show_progress(): print("[TOGSim] Error output:", e.output) assert 0 # Save result to result_path - result_path = os.path.join(os.path.dirname(model_path), "togsim_result") + result_path = extension_config.CONFIG_TORCHSIM_LOG_PATH os.makedirs(result_path, exist_ok=True) - file_name = str(len(os.listdir(result_path))) + file_name = "togsim_result.log" result_path = os.path.join(result_path, file_name) with open(result_path, "w") as f: f.write(result.decode()) if not silent_mode or extension_config.CONFIG_DEBUG_MODE: - print(f'[TOGSim] Simulation of "{model_path}" is stored to "{result_path}"') + model_path_log = f' of "{model_path}" ' if extension_config.CONFIG_DEBUG_MODE else " " + print(f'[TOGSim] Simulation log{model_path_log}is stored to "{result_path}"') return result_path def interactive_simulation(self): From 51d702890c6b86c02b122b9123f717029926191b Mon Sep 17 00:00:00 2001 From: Yunseon Shin Date: Tue, 2 Dec 2025 14:26:05 +0000 Subject: [PATCH 06/21] [refactor] mapping policy config --- PyTorchSimFrontend/extension_config.py | 28 ++++++++----------- .../mlir/mlir_codegen_backend.py | 2 +- PyTorchSimFrontend/mlir/mlir_gemm_template.py | 2 +- PyTorchSimFrontend/mlir/mlir_template.py | 2 +- 4 files changed, 15 insertions(+), 19 deletions(-) diff --git a/PyTorchSimFrontend/extension_config.py b/PyTorchSimFrontend/extension_config.py index 4f950893..9b9381b2 100644 --- a/PyTorchSimFrontend/extension_config.py +++ b/PyTorchSimFrontend/extension_config.py @@ -29,11 +29,9 @@ def __getattr__(name): return os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim') if name == "CONFIG_TORCHSIM_DUMP_PATH": - return os.environ.get('TORCHSIM_DUMP_PATH', default = f"{tempfile.gettempdir()}/torchinductor") + return os.environ.get('TORCHSIM_DUMP_PATH', default = __getattr__('CONFIG_TORCHSIM_DIR')) if name == "CONFIG_TORCHSIM_LOG_PATH": return os.environ.get('TORCHSIM_DUMP_LOG_PATH', default = os.path.join(__getattr__("CONFIG_TORCHSIM_DIR"), "outputs", datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))) - if name == "CONFIG_TORCHSIM_DUMP_FILE": - return int(os.environ.get('TORCHSIM_DUMP_FILE', default=True)) if name == "CONFIG_TORCHSIM_FUNCTIONAL_MODE": return int(os.environ.get('TORCHSIM_FUNCTIONAL_MODE', default=True)) if name == "CONFIG_TORCHSIM_TIMING_MODE": @@ -70,23 +68,15 @@ def __getattr__(name): return os.environ.get('GEM5_SCRIPT_PATH', default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/gem5_script/script_systolic.py") - # AUTOTUNE config - if name == "CONFIG_AUTOTUNE": - return int(os.environ.get('AUTOTUNE', default=False)) - if name == "CONFIG_AUTOTUNE_TEMPLATE": - return int(os.environ.get('AUTOTUNE_TEMPLATE', default=False)) - if name == "CONFIG_MAX_AUTOTUNE_TRY": - return int(os.environ.get('MAX_AUTOTUNE_TRY', default=10)) - if name == "CONFIG_AUTOTUNE_TEMPLATE_TOPK": - return int(os.environ.get('AUTOTUNE_TEMPLATE_TOPK', default=4)) - # For block sparse if name == "CONFIG_BLOCK_SPARSE": return int(os.environ.get('BLOCK_SPARSE', default=0)) - # For GEMM tile size - if name == "CONFIG_MANUAL_TILE_SIZE": - return int(os.environ.get('TORCHSIM_MANUAL_TILE_SIZE', default=False)) + # Mapping Policy + if name == "CONFIG_MAPPING_POLICY": + return os.environ.get('TORCHSIM_MAPPING_POLICY', default="heuristic") # heuristic, manual, autotune + + # Manual Tile Size if name == "CONFIG_TILE_M": return int(os.getenv("TORCHSIM_TILE_M", __getattr__("CONFIG_VECTOR_LANE"))) if name == "CONFIG_TILE_N": @@ -103,6 +93,12 @@ def __getattr__(name): if name == "CONFIG_SUBTILE_K": return int(os.environ.get('TORCHSIM_SUBTILE_K', default=__getattr__("CONFIG_VECTOR_LANE"))) + # Autotune config + if name == "CONFIG_MAX_AUTOTUNE_TRY": + return int(os.environ.get('MAX_AUTOTUNE_TRY', default=10)) + if name == "CONFIG_AUTOTUNE_TEMPLATE_TOPK": + return int(os.environ.get('AUTOTUNE_TEMPLATE_TOPK', default=4)) + if name == "CONFIG_GEMM_CHEATSHEET_PATH": return os.environ.get('TORCHSIM_GEMM_CHEATSHEET_PATH', default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/validation/gemm_tpuv3_cheatsheet.json") diff --git a/PyTorchSimFrontend/mlir/mlir_codegen_backend.py b/PyTorchSimFrontend/mlir/mlir_codegen_backend.py index 001f9305..2732c25b 100644 --- a/PyTorchSimFrontend/mlir/mlir_codegen_backend.py +++ b/PyTorchSimFrontend/mlir/mlir_codegen_backend.py @@ -1683,7 +1683,7 @@ def _log_autotune_result(self, best_choice, best_cycle): def codegen_nodes(self, nodes, kernel_name): src_code = super().codegen_nodes(nodes, kernel_name) self._prepare_simulator_headers(src_code) - if extension_config.CONFIG_AUTOTUNE and extension_config.CONFIG_TORCHSIM_TIMING_MODE: + if extension_config.CONFIG_MAPPING_POLICY == "autotune" and extension_config.CONFIG_TORCHSIM_TIMING_MODE: optimal_src_code = self.autotune(nodes, kernel_name)[0] if optimal_src_code is not None: return optimal_src_code diff --git a/PyTorchSimFrontend/mlir/mlir_gemm_template.py b/PyTorchSimFrontend/mlir/mlir_gemm_template.py index 6271b548..6b504b38 100644 --- a/PyTorchSimFrontend/mlir/mlir_gemm_template.py +++ b/PyTorchSimFrontend/mlir/mlir_gemm_template.py @@ -307,7 +307,7 @@ def select_tile(self, kernel, M, N, K, n_extra_node, n_extra_read, n_prologue_no data = json.load(f) gemm_shape = f"{M}_{K}_{N}" - if extension_config.CONFIG_MANUAL_TILE_SIZE: + if extension_config.CONFIG_MAPPING_POLICY == "manual": # case 1: use manual tile size TILE_M = extension_config.CONFIG_TILE_M TILE_N = extension_config.CONFIG_TILE_N diff --git a/PyTorchSimFrontend/mlir/mlir_template.py b/PyTorchSimFrontend/mlir/mlir_template.py index b463ca6e..50f61a59 100644 --- a/PyTorchSimFrontend/mlir/mlir_template.py +++ b/PyTorchSimFrontend/mlir/mlir_template.py @@ -508,7 +508,7 @@ def _log_autotune_result(self, best_choice, best_cycle): ) def codegen_nodes(self, tile_candidates, render, template_node, prologue_nodes, epilogue_nodes): - if extension_config.CONFIG_AUTOTUNE_TEMPLATE and len(tile_candidates): + if extension_config.CONFIG_MAPPING_POLICY == "autotune" and len(tile_candidates): src_code, loop_size = self.autotune(tile_candidates, render, template_node, prologue_nodes, epilogue_nodes) self.loop_size = loop_size else: From 230dfcb421e71e7652e59e380f3cc51be2305efa Mon Sep 17 00:00:00 2001 From: Wonhyuk Yang Date: Tue, 2 Dec 2025 15:39:04 +0000 Subject: [PATCH 07/21] [Refactor] Remove deprecated env var --- PyTorchSimFrontend/extension_codecache.py | 13 ------------- PyTorchSimFrontend/extension_config.py | 11 +---------- README.md | 2 -- Simulator/simulator.py | 3 ++- 4 files changed, 3 insertions(+), 26 deletions(-) diff --git a/PyTorchSimFrontend/extension_codecache.py b/PyTorchSimFrontend/extension_codecache.py index bddf2f0c..931b63e5 100644 --- a/PyTorchSimFrontend/extension_codecache.py +++ b/PyTorchSimFrontend/extension_codecache.py @@ -27,19 +27,6 @@ def dump_metadata(args, arg_attributes, path): file.write(f'{arg_name}=({arg_attribute[0]}, {arg.dtype}, {arg.shape})\n') return -def llvm_compile_command(input, output): - opt_output = f"{input[:-3]}_opt.ll" - return [re.sub(r"[ \n]+", " ", - f""" - {extension_config.CONFIG_TORCHSIM_LLVM_PATH}/opt --load-pass-plugin={extension_config.CONFIG_TORCHSIM_CUSTOM_PASS_PATH}/libLowerGemminiPass.so -S -march=riscv64 --passes=LowerGemminiPass {input} -o {opt_output} - """, - ).strip(), - re.sub(r"[ \n]+", " ", - f""" - {extension_config.CONFIG_TORCHSIM_LLVM_PATH}/llc -march=riscv64 -mattr=+m,+f,+d,+a,+c,+v -O2 {opt_output} -o {output} - """, - ).strip()] - def mlir_compile_command(filename, vectorlane_size, vlen=256): return [re.sub(r"[ \n]+", " ", f""" diff --git a/PyTorchSimFrontend/extension_config.py b/PyTorchSimFrontend/extension_config.py index 9b9381b2..7e101106 100644 --- a/PyTorchSimFrontend/extension_config.py +++ b/PyTorchSimFrontend/extension_config.py @@ -42,9 +42,6 @@ def __getattr__(name): # LLVM PATH if name == "CONFIG_TORCHSIM_LLVM_PATH": return os.environ.get('TORCHSIM_LLVM_PATH', default="/usr/bin") - if name == "CONFIG_TORCHSIM_CUSTOM_PASS_PATH": - return os.environ.get('TORCHSIM_CUSTOM_PASS_PATH', - default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/GemminiLowerPass/build") if name == "CONFIG_TORCHSIM_DUMP_MLIR_IR": return int(os.environ.get("TORCHSIM_DUMP_MLIR_IR", default=False)) if name == "CONFIG_TORCHSIM_DUMP_LLVM_IR": @@ -64,13 +61,6 @@ def __getattr__(name): # GEM5 config if name == "CONFIG_GEM5_PATH": return os.environ.get('GEM5_PATH', default="/workspace/gem5/build/RISCV/gem5.opt") - if name == "CONFIG_GEM5_SCRIPT_PATH": - return os.environ.get('GEM5_SCRIPT_PATH', - default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/gem5_script/script_systolic.py") - - # For block sparse - if name == "CONFIG_BLOCK_SPARSE": - return int(os.environ.get('BLOCK_SPARSE', default=0)) # Mapping Policy if name == "CONFIG_MAPPING_POLICY": @@ -105,6 +95,7 @@ def __getattr__(name): # Compiler Optimization if name == "CONFIG_COMPILER_OPTIMIZATION": return os.environ.get('TORCHSIM_COMPILER_OPTIMIZATION', default="all") # options: all, none, custom + # Advanced fusion options if name == "CONFIG_FUSION": return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "fusion" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False diff --git a/README.md b/README.md index dbfdf2e8..75e08889 100644 --- a/README.md +++ b/README.md @@ -333,8 +333,6 @@ export TORCHSIM_VECTOR_LANE=128 # vector lane size export TORCHSIM_VECTOR_LANE_STRIDE=2 # vector lane stride for DMA export TORCHSIM_DIR=/workspace/PyTorchSim # home directory -export BLOCK_SPARSE=0 # If you want to use block sparse workload, turn it on - # Plan which tensor allocated in TPUv4's CMEM export SRAM_BUFFER_PLAN_PATH=/workspace/PyTorchSim/tpuv4/gemm_plan.py diff --git a/Simulator/simulator.py b/Simulator/simulator.py index b16ee4cf..530cab8b 100644 --- a/Simulator/simulator.py +++ b/Simulator/simulator.py @@ -161,7 +161,8 @@ def show_progress(): print("") dir_path = os.path.join(os.path.dirname(target_binary), "m5out") - gem5_cmd = [extension_config.CONFIG_GEM5_PATH, "-r", "--stdout-file=sto.log", "-d", dir_path, extension_config.CONFIG_GEM5_SCRIPT_PATH, "-c", target_binary, "--vlane", str(vectorlane_size)] + gem5_script_path = os.path.join(extension_config.CONFIG_TORCHSIM_DIR, "gem5_script/script_systolic.py") + gem5_cmd = [extension_config.CONFIG_GEM5_PATH, "-r", "--stdout-file=sto.log", "-d", dir_path, gem5_script_path, "-c", target_binary, "--vlane", str(vectorlane_size)] try: # Create progress thread is_dryrun = int(os.environ.get('TOGSIM_DRYRUN', default=False)) or silent_mode From 075a5dc35f186a989c308f3f536e6f1019aa7c9c Mon Sep 17 00:00:00 2001 From: Yunseon Shin Date: Tue, 2 Dec 2025 15:53:28 +0000 Subject: [PATCH 08/21] [Refactor] print ramulator & booksim config --- TOGSim/include/Interconnect.h | 3 +++ TOGSim/include/Simulator.h | 1 + TOGSim/src/Interconnect.cc | 13 ++++++++++++- TOGSim/src/Simulator.cc | 5 ++++- 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/TOGSim/include/Interconnect.h b/TOGSim/include/Interconnect.h index e6b325d0..3f0aaff7 100644 --- a/TOGSim/include/Interconnect.h +++ b/TOGSim/include/Interconnect.h @@ -3,6 +3,8 @@ #include "DMA.h" #include "booksim2/Interconnect.hpp" #include +#include +#include #include namespace fs = std::filesystem; @@ -69,6 +71,7 @@ class Booksim2Interconnect : public Interconnect { virtual mem_fetch* top(uint32_t nid) override; virtual void pop(uint32_t nid) override; virtual void print_stats() override; + void print_config(std::string config_path); private: uint32_t _ctrl_size; diff --git a/TOGSim/include/Simulator.h b/TOGSim/include/Simulator.h index 4d9defd1..39fa310e 100644 --- a/TOGSim/include/Simulator.h +++ b/TOGSim/include/Simulator.h @@ -3,6 +3,7 @@ #include #include #include +#include #include "Common.h" #include "Core.h" #include "SparseCore.h" diff --git a/TOGSim/src/Interconnect.cc b/TOGSim/src/Interconnect.cc index ab2d5d89..096efe3d 100644 --- a/TOGSim/src/Interconnect.cc +++ b/TOGSim/src/Interconnect.cc @@ -83,11 +83,22 @@ Booksim2Interconnect::Booksim2Interconnect(SimulationConfig config) { std::string(onnxim_path_env) + "/TOGSim" : std::string("./"); _config_path = fs::path(onnxim_path).append("configs").append((std::string)config.icnt_config_path).string(); - spdlog::info("Config path : {}", _config_path); + spdlog::info("Booksim 2 config path : {}", _config_path); + print_config(_config_path); _booksim = std::make_unique(_config_path, _n_nodes); _ctrl_size = 8; } +void Booksim2Interconnect::print_config(std::string config_path) { + std::ifstream config_file(config_path); + std::string line; + spdlog::info("Booksim2 Configuration: "); + while (std::getline(config_file, line)) { + std::cout << line << std::endl; + } + config_file.close(); +} + bool Booksim2Interconnect::running() { return false; } diff --git a/TOGSim/src/Simulator.cc b/TOGSim/src/Simulator.cc index 41a2c7a5..fa47f23f 100644 --- a/TOGSim/src/Simulator.cc +++ b/TOGSim/src/Simulator.cc @@ -42,7 +42,10 @@ Simulator::Simulator(SimulationConfig config) .append("configs") .append(config.dram_config_path) .string(); - spdlog::info("[Config/DRAM] Ramulator2 config: {}", ramulator_config); + spdlog::info("[Config/DRAM] Ramulator2 config path: {}", ramulator_config); + YAML::Node dram_config = YAML::LoadFile(ramulator_config); + spdlog::info("Ramulator2 config: "); + std::cout << dram_config << std::endl; config.dram_config_path = ramulator_config; _dram = std::make_unique(config, &_core_cycles); } else { From bd1064d1dcab3ce6482d6199f1f3e7e5721a1417 Mon Sep 17 00:00:00 2001 From: Yunseon Shin Date: Tue, 2 Dec 2025 16:12:00 +0000 Subject: [PATCH 09/21] [Tutorial] session1 notebook --- PyTorchSimFrontend/extension_config.py | 2 +- tutorial/session1/CompilerOptimization.ipynb | 137 ++---------- tutorial/session1/DNNServing.ipynb | 44 +--- tutorial/session1/ExecutionMode.ipynb | 200 +++--------------- ...{HelloPyTorchSim.ipynb => Inference.ipynb} | 87 ++------ tutorial/session1/LogAnalysis.ipynb | 74 +------ tutorial/session1/Mapping.ipynb | 180 +++------------- tutorial/session1/Training.ipynb | 70 +----- 8 files changed, 110 insertions(+), 684 deletions(-) rename tutorial/session1/{HelloPyTorchSim.ipynb => Inference.ipynb} (51%) diff --git a/PyTorchSimFrontend/extension_config.py b/PyTorchSimFrontend/extension_config.py index 7e101106..7e9bde0e 100644 --- a/PyTorchSimFrontend/extension_config.py +++ b/PyTorchSimFrontend/extension_config.py @@ -49,7 +49,7 @@ def __getattr__(name): # TOGSim config if name == "CONFIG_TOGSIM_CONFIG": - return os.environ.get('TORCHSIM_CONFIG', + return os.environ.get('TOGSIM_CONFIG', default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json") if name == "CONFIG_TOGSIM_EAGER_MODE": return int(os.environ.get("TOGSIM_EAGER_MODE", default=False)) diff --git a/tutorial/session1/CompilerOptimization.ipynb b/tutorial/session1/CompilerOptimization.ipynb index dec4a383..6ea98cc7 100644 --- a/tutorial/session1/CompilerOptimization.ipynb +++ b/tutorial/session1/CompilerOptimization.ipynb @@ -9,16 +9,8 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T16:02:57.028461Z", - "iopub.status.busy": "2025-12-01T16:02:57.027718Z", - "iopub.status.idle": "2025-12-01T16:02:58.396341Z", - "shell.execute_reply": "2025-12-01T16:02:58.394939Z", - "shell.execute_reply.started": "2025-12-01T16:02:57.028437Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "import torch\n", @@ -34,46 +26,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### GeMM + ReLU fusion" + "### GeMM + ReLU fusion (Default)" ] }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T16:02:58.403173Z", - "iopub.status.busy": "2025-12-01T16:02:58.403009Z", - "iopub.status.idle": "2025-12-01T16:03:17.426642Z", - "shell.execute_reply": "2025-12-01T16:03:17.425212Z", - "shell.execute_reply.started": "2025-12-01T16:02:58.403156Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", - "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/npu/build.ninja...\n", - "Building extension module npu...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "Loading extension module npu...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ninja: no work to do.\n", - "Wrapper Codegen Path = /tmp/torchinductor_root/3z/c3zx4dfsx2o24goyevxgy4upevdsyxegbadiquz7z33ttsbs22a6.py\n", - "[Gem5] Gem5 is running... \n", - "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/5o2xythi5z3/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/5o2xythi5z3/togsim_result/0\"\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ + "os.environ['TORCHSIM_DUMP_PATH']=os.path.join(base_dir, \"fused\")\n", "from Scheduler.scheduler import PyTorchSimRunner\n", "device = PyTorchSimRunner.setup_device().custom_device()\n", "\n", @@ -89,27 +51,11 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T16:03:17.428533Z", - "iopub.status.busy": "2025-12-01T16:03:17.428158Z", - "iopub.status.idle": "2025-12-01T16:03:17.637165Z", - "shell.execute_reply": "2025-12-01T16:03:17.635970Z", - "shell.execute_reply.started": "2025-12-01T16:03:17.428513Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2025-12-01 16:03:17.222] [info] Total execution cycles: 51715\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "!cat /root/workspace/PyTorchSim/tmp/5o2xythi5z3/togsim_result/0 | grep \"Total execution cycle\"" + "!cat /root/workspace/PyTorchSim/outputs/20251202_060538/togsim_result.log | grep \"Total execution cycle\"" ] }, { @@ -121,32 +67,11 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T16:03:17.638828Z", - "iopub.status.busy": "2025-12-01T16:03:17.638591Z", - "iopub.status.idle": "2025-12-01T16:03:30.063552Z", - "shell.execute_reply": "2025-12-01T16:03:30.062137Z", - "shell.execute_reply.started": "2025-12-01T16:03:17.638808Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Wrapper Codegen Path = /tmp/torchinductor_root/qi/cqi2yw3qrkfpe7v2zwm2hydottmvrhqqzqmmhaen7ozcvkkcrvro.py\n", - "[Gem5] Gem5 is running... \n", - "[Gem5] Gem5 is running.. \n", - "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/x2ueokr7kg3/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/x2ueokr7kg3/togsim_result/0\"\n", - "[TOGSim] TOGSim is running... \n", - "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/37dfo4nczcq/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/37dfo4nczcq/togsim_result/0\"\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ + "os.environ['TORCHSIM_DUMP_PATH']=os.path.join(base_dir, \"non_fused\")\n", "os.environ['TORCHSIM_COMPILER_OPTIMIZATION']=\"none\"\n", "\n", "input = torch.randn(1024, 1024).to(device=device)\n", @@ -159,39 +84,15 @@ "out = opt_fn(input, weight)" ] }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T16:03:51.827713Z", - "iopub.status.busy": "2025-12-01T16:03:51.827336Z", - "iopub.status.idle": "2025-12-01T16:03:52.225778Z", - "shell.execute_reply": "2025-12-01T16:03:52.224365Z", - "shell.execute_reply.started": "2025-12-01T16:03:51.827690Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2025-12-01 16:03:27.953] [info] Total execution cycles: 49999\n", - "[2025-12-01 16:03:29.117] [info] Total execution cycles: 66093\n" - ] - } - ], - "source": [ - "!cat /root/workspace/PyTorchSim/tmp/x2ueokr7kg3/togsim_result/0 | grep \"Total execution cycle\"\n", - "!cat /root/workspace/PyTorchSim/tmp/37dfo4nczcq/togsim_result/0 | grep \"Total execution cycle\"" - ] - }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "!cat /root/workspace/PyTorchSim/outputs/20251202_055530/togsim_result.log | grep \"Total execution cycle\"\n", + "!cat /root/workspace/PyTorchSim/outputs/20251202_055532/togsim_result.log | grep \"Total execution cycle\"" + ] } ], "metadata": { diff --git a/tutorial/session1/DNNServing.ipynb b/tutorial/session1/DNNServing.ipynb index 3067822b..b38bfe6a 100644 --- a/tutorial/session1/DNNServing.ipynb +++ b/tutorial/session1/DNNServing.ipynb @@ -9,16 +9,8 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T16:01:10.802611Z", - "iopub.status.busy": "2025-12-01T16:01:10.802338Z", - "iopub.status.idle": "2025-12-01T16:01:12.056183Z", - "shell.execute_reply": "2025-12-01T16:01:12.055040Z", - "shell.execute_reply.started": "2025-12-01T16:01:10.802591Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "import torch\n", @@ -37,36 +29,16 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T16:01:12.057637Z", - "iopub.status.busy": "2025-12-01T16:01:12.057366Z", - "iopub.status.idle": "2025-12-01T16:01:13.474161Z", - "shell.execute_reply": "2025-12-01T16:01:13.472936Z", - "shell.execute_reply.started": "2025-12-01T16:01:12.057620Z" - } - }, - "outputs": [ - { - "ename": "TypeError", - "evalue": "Scheduler.__init__() got an unexpected keyword argument 'backend_config'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mScheduler\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mscheduler\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Scheduler, SchedulerDNNModel, Request\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mPyTorchSimFrontend\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mextension_config\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CONFIG_TORCHSIM_BACKEND_CONFIG\n\u001b[0;32m----> 6\u001b[0m scheduler \u001b[38;5;241m=\u001b[39m \u001b[43mScheduler\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_request_queue\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mengine_select\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mScheduler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mFIFO_ENGINE\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbackend_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mCONFIG_TORCHSIM_BACKEND_CONFIG\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7\u001b[0m device \u001b[38;5;241m=\u001b[39m scheduler\u001b[38;5;241m.\u001b[39mexecution_engine\u001b[38;5;241m.\u001b[39mmodule\u001b[38;5;241m.\u001b[39mcustom_device()\n\u001b[1;32m 9\u001b[0m model \u001b[38;5;241m=\u001b[39m resnet18()\u001b[38;5;241m.\u001b[39meval()\n", - "\u001b[0;31mTypeError\u001b[0m: Scheduler.__init__() got an unexpected keyword argument 'backend_config'" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import torch\n", "from torchvision.models import resnet18\n", "from Scheduler.scheduler import Scheduler, SchedulerDNNModel, Request\n", "from PyTorchSimFrontend import extension_config\n", "\n", - "scheduler = Scheduler(num_request_queue=1, engine_select=Scheduler.FIFO_ENGINE, togsim_config=extension_config.CONFIG_TOGSIM_CONFIG)\n", + "scheduler = Scheduler(num_request_queue=1, engine_select=Scheduler.FIFO_ENGINE, togsim_config=extension_config.TOGSIM_CONFIG)\n", "device = scheduler.execution_engine.module.custom_device()\n", "\n", "model = resnet18().eval()\n", @@ -103,7 +75,7 @@ "from torchvision.models import resnet18\n", "\n", "from Scheduler.scheduler import Scheduler, SchedulerDNNModel, Request, poisson_request_generator\n", - "CONFIG_TORCHSIM_DIR = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim')\n", + "TORCHSIM_DIR = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim')\n", "\n", "lambda_requests = 10\n", "max_time = 30\n", @@ -111,7 +83,7 @@ "target_model1 = resnet18().eval()\n", "\n", "# Init scheduler\n", - "scheduler = Scheduler(num_request_queue=1, max_batch=32, engine_select=Scheduler.FIFO_ENGINE, backend_config=f\"{CONFIG_TORCHSIM_DIR}/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json\")\n", + "scheduler = Scheduler(num_request_queue=1, max_batch=32, engine_select=Scheduler.FIFO_ENGINE, togsim_config=extension_config.TOGSIM_CONFIG)\n", "# Register compiled model\n", "opt_model1 = torch.compile(target_model1.to(device=scheduler.execution_engine.module.custom_device(), memory_format=torch.channels_last), dynamic=False)\n", "SchedulerDNNModel.register_model(\"resnet18\", opt_model1)\n", diff --git a/tutorial/session1/ExecutionMode.ipynb b/tutorial/session1/ExecutionMode.ipynb index bebf7951..d1b1aadf 100644 --- a/tutorial/session1/ExecutionMode.ipynb +++ b/tutorial/session1/ExecutionMode.ipynb @@ -9,16 +9,8 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:07:50.977314Z", - "iopub.status.busy": "2025-12-01T15:07:50.976976Z", - "iopub.status.idle": "2025-12-01T15:07:52.257401Z", - "shell.execute_reply": "2025-12-01T15:07:52.256027Z", - "shell.execute_reply.started": "2025-12-01T15:07:50.977284Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "import torch\n", @@ -37,41 +29,9 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:07:56.915608Z", - "iopub.status.busy": "2025-12-01T15:07:56.915105Z", - "iopub.status.idle": "2025-12-01T15:08:16.513755Z", - "shell.execute_reply": "2025-12-01T15:08:16.512040Z", - "shell.execute_reply.started": "2025-12-01T15:07:56.915587Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", - "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/npu/build.ninja...\n", - "Building extension module npu...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "Loading extension module npu...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ninja: no work to do.\n", - "Wrapper Codegen Path = /tmp/torchinductor_root/yr/cyrl4zqohiglmrez32dmaijhd3sfdh4xabea5splhxwtwckiykpr.py\n", - "[Gem5] Gem5 is running.. \n", - "[Spike] Running Spike simulator\n", - "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/3\"\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from Scheduler.scheduler import PyTorchSimRunner\n", "device = PyTorchSimRunner.setup_device().custom_device()\n", @@ -95,25 +55,9 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:08:16.515946Z", - "iopub.status.busy": "2025-12-01T15:08:16.515491Z", - "iopub.status.idle": "2025-12-01T15:08:24.623667Z", - "shell.execute_reply": "2025-12-01T15:08:24.622036Z", - "shell.execute_reply.started": "2025-12-01T15:08:16.515923Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Spike] Running Spike simulator\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "os.environ['TORCHSIM_FUNCTIONAL_MODE']=\"1\"\n", "os.environ['TORCHSIM_TIMING_MODE']=\"0\"\n", @@ -134,26 +78,9 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:08:24.625148Z", - "iopub.status.busy": "2025-12-01T15:08:24.624921Z", - "iopub.status.idle": "2025-12-01T15:08:27.657175Z", - "shell.execute_reply": "2025-12-01T15:08:27.655628Z", - "shell.execute_reply.started": "2025-12-01T15:08:24.625129Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/4\"\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "os.environ['TORCHSIM_FUNCTIONAL_MODE']=\"0\"\n", "os.environ['TORCHSIM_TIMING_MODE']=\"1\"\n", @@ -175,30 +102,11 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:08:27.658146Z", - "iopub.status.busy": "2025-12-01T15:08:27.657941Z", - "iopub.status.idle": "2025-12-01T15:08:59.079718Z", - "shell.execute_reply": "2025-12-01T15:08:59.078585Z", - "shell.execute_reply.started": "2025-12-01T15:08:27.658128Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Wrapper Codegen Path = /tmp/torchinductor_root/m6/cm63zhmgb7n2askwt37lf72xuvbgpk6uvtmexreuxosqt3g5466s.py\n", - "[Gem5] Gem5 is running.. \n", - "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/hb6bvrfkqds/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/hb6bvrfkqds/togsim_result/0\"\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "os.environ['TORCHSIM_CONFIG']=f\"{base_dir}/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json\"\n", + "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json\"\n", "\n", "input = torch.randn(2048, 2048).to(device=device)\n", "weight = torch.randn(2048, 2048).to(device=device)\n", @@ -209,27 +117,11 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:09:46.743678Z", - "iopub.status.busy": "2025-12-01T15:09:46.743216Z", - "iopub.status.idle": "2025-12-01T15:09:46.967813Z", - "shell.execute_reply": "2025-12-01T15:09:46.966078Z", - "shell.execute_reply.started": "2025-12-01T15:09:46.743653Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2025-12-01 15:08:58.162] [info] Total execution cycles: 299113\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "!cat /root/workspace/PyTorchSim/tmp/hb6bvrfkqds/togsim_result/0 | grep \"Total execution cycle\"" + "!cat /root/workspace/PyTorchSim/outputs/20251202_160520/togsim_result.log | grep \"Total execution cycle\"" ] }, { @@ -241,28 +133,11 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:09:57.045456Z", - "iopub.status.busy": "2025-12-01T15:09:57.044956Z", - "iopub.status.idle": "2025-12-01T15:10:23.158900Z", - "shell.execute_reply": "2025-12-01T15:10:23.157486Z", - "shell.execute_reply.started": "2025-12-01T15:09:57.045420Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[TOGSim] TOGSim is running... \n", - "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/hb6bvrfkqds/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/hb6bvrfkqds/togsim_result/1\"\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "os.environ['TORCHSIM_CONFIG']=f\"{base_dir}/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json\"\n", + "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json\"\n", "\n", "input = torch.randn(2048, 2048).to(device=device)\n", "weight = torch.randn(2048, 2048).to(device=device)\n", @@ -271,37 +146,14 @@ "npu_out = opt_fn(input, weight)" ] }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:10:25.410879Z", - "iopub.status.busy": "2025-12-01T15:10:25.410424Z", - "iopub.status.idle": "2025-12-01T15:10:25.626504Z", - "shell.execute_reply": "2025-12-01T15:10:25.625016Z", - "shell.execute_reply.started": "2025-12-01T15:10:25.410844Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2025-12-01 15:10:22.824] [info] Total execution cycles: 167394\n" - ] - } - ], - "source": [ - "!cat /root/workspace/PyTorchSim/tmp/hb6bvrfkqds/togsim_result/1 | grep \"Total execution cycle\"" - ] - }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "!cat /root/workspace/PyTorchSim/outputs/20251202_160547/togsim_result.log | grep \"Total execution cycle\"" + ] }, { "cell_type": "code", diff --git a/tutorial/session1/HelloPyTorchSim.ipynb b/tutorial/session1/Inference.ipynb similarity index 51% rename from tutorial/session1/HelloPyTorchSim.ipynb rename to tutorial/session1/Inference.ipynb index be7acafa..a49e2440 100644 --- a/tutorial/session1/HelloPyTorchSim.ipynb +++ b/tutorial/session1/Inference.ipynb @@ -4,21 +4,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Hello, PyTorchSim!" + "# Basic usage\n", + "## Inference" ] }, { "cell_type": "code", - "execution_count": 8, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:00:24.548912Z", - "iopub.status.busy": "2025-12-01T15:00:24.548584Z", - "iopub.status.idle": "2025-12-01T15:00:24.552810Z", - "shell.execute_reply": "2025-12-01T15:00:24.551884Z", - "shell.execute_reply.started": "2025-12-01T15:00:24.548889Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "import torch\n", @@ -32,22 +25,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## One Touch Simulation\n", "### Normal Matmul Code" ] }, { "cell_type": "code", - "execution_count": 9, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:00:26.339225Z", - "iopub.status.busy": "2025-12-01T15:00:26.338820Z", - "iopub.status.idle": "2025-12-01T15:00:26.376456Z", - "shell.execute_reply": "2025-12-01T15:00:26.375714Z", - "shell.execute_reply.started": "2025-12-01T15:00:26.339187Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", @@ -69,36 +53,9 @@ }, { "cell_type": "code", - "execution_count": 11, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:00:55.016492Z", - "iopub.status.busy": "2025-12-01T15:00:55.016117Z", - "iopub.status.idle": "2025-12-01T15:00:56.241124Z", - "shell.execute_reply": "2025-12-01T15:00:56.239891Z", - "shell.execute_reply.started": "2025-12-01T15:00:55.016471Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", - "No modifications detected for re-loaded extension module npu, skipping build step...\n", - "Loading extension module npu...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Spike] Running Spike simulator\n", - "[TOGSim] TOGSim is running.. \n", - "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/fy6nnyudtno/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/fy6nnyudtno/togsim_result/3\"\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from Scheduler.scheduler import PyTorchSimRunner\n", "device = PyTorchSimRunner.setup_device().custom_device()\n", @@ -113,16 +70,8 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T14:23:02.977576Z", - "iopub.status.busy": "2025-12-01T14:23:02.977359Z", - "iopub.status.idle": "2025-12-01T14:23:02.982914Z", - "shell.execute_reply": "2025-12-01T14:23:02.981939Z", - "shell.execute_reply.started": "2025-12-01T14:23:02.977557Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "def test_result(name, npu_out, cpu_out, rtol=1e-4, atol=1e-4):\n", @@ -143,19 +92,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--------------------\n", - "|MatMul Test Passed|\n", - "--------------------\n" - ] - } - ], + "outputs": [], "source": [ "test_result(\"MatMul\", npu_out, cpu_out)" ] diff --git a/tutorial/session1/LogAnalysis.ipynb b/tutorial/session1/LogAnalysis.ipynb index 70b2418c..24732dda 100644 --- a/tutorial/session1/LogAnalysis.ipynb +++ b/tutorial/session1/LogAnalysis.ipynb @@ -9,16 +9,8 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:02:31.262456Z", - "iopub.status.busy": "2025-12-01T15:02:31.262104Z", - "iopub.status.idle": "2025-12-01T15:02:32.582082Z", - "shell.execute_reply": "2025-12-01T15:02:32.580928Z", - "shell.execute_reply.started": "2025-12-01T15:02:31.262435Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "import torch\n", @@ -39,40 +31,9 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:02:32.583264Z", - "iopub.status.busy": "2025-12-01T15:02:32.582996Z", - "iopub.status.idle": "2025-12-01T15:02:44.433624Z", - "shell.execute_reply": "2025-12-01T15:02:44.432023Z", - "shell.execute_reply.started": "2025-12-01T15:02:32.583247Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", - "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/npu/build.ninja...\n", - "Building extension module npu...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "Loading extension module npu...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ninja: no work to do.\n", - "Wrapper Codegen Path = /tmp/torchinductor_root/yr/cyrl4zqohiglmrez32dmaijhd3sfdh4xabea5splhxwtwckiykpr.py\n", - "[Gem5] Gem5 is running.. \n", - "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/0\"\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from Scheduler.scheduler import PyTorchSimRunner\n", "device = PyTorchSimRunner.setup_device().custom_device()\n", @@ -93,28 +54,11 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:02:44.434717Z", - "iopub.status.busy": "2025-12-01T15:02:44.434416Z", - "iopub.status.idle": "2025-12-01T15:02:47.466577Z", - "shell.execute_reply": "2025-12-01T15:02:47.464858Z", - "shell.execute_reply.started": "2025-12-01T15:02:44.434698Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/1\"\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "os.environ['BACKENDSIM_DEBUG_LEVEL']=\"trace\"\n", + "os.environ['TOGSIM_DEBUG_LEVEL']=\"trace\"\n", "\n", "input = torch.randn(1024, 1024).to(device=device)\n", "weight = torch.randn(1024, 1024).to(device=device)\n", diff --git a/tutorial/session1/Mapping.ipynb b/tutorial/session1/Mapping.ipynb index 15b1a1c1..b29825f7 100644 --- a/tutorial/session1/Mapping.ipynb +++ b/tutorial/session1/Mapping.ipynb @@ -9,16 +9,8 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:47:11.380236Z", - "iopub.status.busy": "2025-12-01T15:47:11.380048Z", - "iopub.status.idle": "2025-12-01T15:47:12.604801Z", - "shell.execute_reply": "2025-12-01T15:47:12.604050Z", - "shell.execute_reply.started": "2025-12-01T15:47:11.380218Z" - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "import torch\n", @@ -37,41 +29,9 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:47:12.605523Z", - "iopub.status.busy": "2025-12-01T15:47:12.605335Z", - "iopub.status.idle": "2025-12-01T15:47:31.694085Z", - "shell.execute_reply": "2025-12-01T15:47:31.693144Z", - "shell.execute_reply.started": "2025-12-01T15:47:12.605510Z" - } - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", - "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/npu/build.ninja...\n", - "Building extension module npu...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "Loading extension module npu...\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ninja: no work to do.\n", - "Wrapper Codegen Path = /tmp/torchinductor_root/yr/cyrl4zqohiglmrez32dmaijhd3sfdh4xabea5splhxwtwckiykpr.py\n", - "[Gem5] Gem5 is running.. \n", - "[Spike] Running Spike simulator\n", - "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/2\"\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "from Scheduler.scheduler import PyTorchSimRunner\n", "device = PyTorchSimRunner.setup_device().custom_device()\n", @@ -85,27 +45,11 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:47:31.695314Z", - "iopub.status.busy": "2025-12-01T15:47:31.695049Z", - "iopub.status.idle": "2025-12-01T15:47:31.893585Z", - "shell.execute_reply": "2025-12-01T15:47:31.892528Z", - "shell.execute_reply.started": "2025-12-01T15:47:31.695296Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2025-12-01 15:44:22.862] [info] Total execution cycles: 48536\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "!cat /root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/0 | grep \"Total execution cycle\"" + "!cat /root/workspace/PyTorchSim/outputs/20251202_154524/togsim_result.log | grep \"Total execution cycle\"" ] }, { @@ -118,33 +62,13 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:47:31.894500Z", - "iopub.status.busy": "2025-12-01T15:47:31.894332Z", - "iopub.status.idle": "2025-12-01T15:47:54.385682Z", - "shell.execute_reply": "2025-12-01T15:47:54.384358Z", - "shell.execute_reply.started": "2025-12-01T15:47:31.894485Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Wrapper Codegen Path = /tmp/torchinductor_root/dg/cdgpndctsltydpvcgfyntgkgfmw6dp4a7vruwmjhxzilxzvyewby.py\n", - "[Gem5] Gem5 is running. \n", - "[Spike] Running Spike simulator\n", - "[TOGSim] TOGSim is running... \n", - "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/75hiq5mugpq/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/75hiq5mugpq/togsim_result/1\"\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "torch._dynamo.reset()\n", "\n", - "os.environ['TORCHSIM_MANUAL_TILE_SIZE']=\"1\"\n", + "os.environ['TORCHSIM_MAPPING_POLICY']=\"manual\"\n", "os.environ['TORCHSIM_TILE_M']=\"512\"\n", "os.environ['TORCHSIM_TILE_N']=\"512\"\n", "os.environ['TORCHSIM_TILE_K']=\"512\"\n", @@ -158,27 +82,11 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:47:54.386633Z", - "iopub.status.busy": "2025-12-01T15:47:54.386431Z", - "iopub.status.idle": "2025-12-01T15:47:54.585477Z", - "shell.execute_reply": "2025-12-01T15:47:54.584126Z", - "shell.execute_reply.started": "2025-12-01T15:47:54.386615Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2025-12-01 15:44:47.602] [info] Total execution cycles: 53419\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "!cat /root/workspace/PyTorchSim/tmp/75hiq5mugpq/togsim_result/0 | grep \"Total execution cycle\"" + "!cat /root/workspace/PyTorchSim/outputs/20251202_141933/togsim_result.log | grep \"Total execution cycle\"" ] }, { @@ -190,33 +98,13 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:47:54.587092Z", - "iopub.status.busy": "2025-12-01T15:47:54.586748Z", - "iopub.status.idle": "2025-12-01T15:48:12.586081Z", - "shell.execute_reply": "2025-12-01T15:48:12.584431Z", - "shell.execute_reply.started": "2025-12-01T15:47:54.587070Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Wrapper Codegen Path = /tmp/torchinductor_root/gg/cgg5mmgs4wa3u5od6mgxuausnbthyzfsnnauqtszfuq7qvl2qrj2.py\n", - "[Gem5] Gem5 is running.. \n", - "[Spike] Running Spike simulator\n", - "[TOGSim] TOGSim is running. \n", - "[TOGSim] Simulation of \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/tile_graph.onnx\" is stored to \"/root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/3\"\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "torch._dynamo.reset()\n", - "os.environ['TORCHSIM_MANUAL_TILE_SIZE']=\"0\"\n", - "os.environ['AUTOTUNE_TEMPLATE']=\"1\"\n", + "\n", + "os.environ['TORCHSIM_MAPPING_POLICY']=\"heuristic\"\n", "\n", "input = torch.randn(1024, 1024).to(device=device)\n", "weight = torch.randn(1024, 1024).to(device=device)\n", @@ -227,27 +115,11 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": { - "execution": { - "iopub.execute_input": "2025-12-01T15:48:12.587335Z", - "iopub.status.busy": "2025-12-01T15:48:12.586969Z", - "iopub.status.idle": "2025-12-01T15:48:12.795718Z", - "shell.execute_reply": "2025-12-01T15:48:12.794347Z", - "shell.execute_reply.started": "2025-12-01T15:48:12.587305Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[2025-12-01 15:45:20.498] [info] Total execution cycles: 48564\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "!cat /root/workspace/PyTorchSim/tmp/4q4qv6gbpia/togsim_result/1 | grep \"Total execution cycle\"" + "!cat /root/workspace/PyTorchSim/outputs/20251202_141951/togsim_result.log | grep \"Total execution cycle\"" ] }, { diff --git a/tutorial/session1/Training.ipynb b/tutorial/session1/Training.ipynb index 9c39cadd..0c6b138a 100644 --- a/tutorial/session1/Training.ipynb +++ b/tutorial/session1/Training.ipynb @@ -9,34 +9,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using /root/.cache/torch_extensions/py310_cu121 as PyTorch extensions root...\n", - "Emitting ninja build file /root/.cache/torch_extensions/py310_cu121/npu/build.ninja...\n", - "Building extension module npu...\n", - "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "ninja: no work to do.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Loading extension module npu...\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "import sys\n", @@ -58,7 +33,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -86,25 +61,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[Spike] Running Spike simulator\n", - "[TOGSim] TOGSim is running.. \n", - "[TOGSim] Simulation of \"/tmp/torchinductor/tmp/fy6nnyudtno/tile_graph.onnx\" is stored to \"/tmp/torchinductor/tmp/fy6nnyudtno/togsim_result/16\"\n", - "[Spike] Running Spike simulator\n", - "[TOGSim] TOGSim is running.. \n", - "[TOGSim] Simulation of \"/tmp/torchinductor/tmp/vaymr3umaez/tile_graph.onnx\" is stored to \"/tmp/torchinductor/tmp/vaymr3umaez/togsim_result/2\"\n", - "[Spike] Running Spike simulator\n", - "[TOGSim] TOGSim is running.. \n", - "[TOGSim] Simulation of \"/tmp/torchinductor/tmp/jspabiga5jh/tile_graph.onnx\" is stored to \"/tmp/torchinductor/tmp/jspabiga5jh/togsim_result/2\"\n" - ] - } - ], + "outputs": [], "source": [ "torch.manual_seed(0)\n", "npu_input = torch.randn(128, 128).to(npu_device)\n", @@ -123,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -145,22 +104,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "-------------------------------\n", - "|MatMul Input Grad Test Passed|\n", - "-------------------------------\n", - "--------------------------------\n", - "|MatMul Weight Grad Test Passed|\n", - "--------------------------------\n" - ] - } - ], + "outputs": [], "source": [ "test_result(\"MatMul Input Grad\", npu_input.grad, cpu_input.grad)\n", "test_result(\"MatMul Weight Grad\", npu_weight.grad, cpu_weight.grad)" From ac0ba7fa60156ff6c51b22b954f339484594262c Mon Sep 17 00:00:00 2001 From: Wonhyuk Yang Date: Tue, 2 Dec 2025 16:17:14 +0000 Subject: [PATCH 10/21] [Refactor] Remove DRY_RUN env --- PyTorchSimFrontend/extension_codecache.py | 4 +--- PyTorchSimFrontend/extension_config.py | 2 -- PyTorchSimFrontend/extension_op.py | 2 +- Scheduler/scheduler.py | 1 - Simulator/simulator.py | 4 ++-- TOGSim/src/main.cc | 2 +- 6 files changed, 5 insertions(+), 10 deletions(-) diff --git a/PyTorchSimFrontend/extension_codecache.py b/PyTorchSimFrontend/extension_codecache.py index 931b63e5..8c60df67 100644 --- a/PyTorchSimFrontend/extension_codecache.py +++ b/PyTorchSimFrontend/extension_codecache.py @@ -301,8 +301,6 @@ def dryrun_simulator(*args, **kwargs): # Dump arguments and meta data dump_metadata(args, arg_attributes, result_path) runtime_path = FunctionalSimulator.get_runtime_dump_path(result_path) - if not extension_config.CONFIG_TORCHSIM_TIMING_MODE: - return # Todo. Support valude dependent mode for graph mode if False: # extension_config.CONFIG_TORCHSIM_FUNCTIONAL_MODE: @@ -313,7 +311,7 @@ def dryrun_simulator(*args, **kwargs): cleanup=extension_config.CONFIG_CLEANUP_DUMP_ARGS) return result_path, runtime_path, None - is_dryrun = int(os.environ.get('TOGSIM_DRYRUN', default=False)) and not autotune + is_dryrun = int(os.environ.get('TOGSIM_EAGER_MODE', default=False)) and not autotune target_simulator = dryrun_simulator if is_dryrun else dummy_simulator target_simulator.arg_attributes = arg_attributes target_simulator.future = future diff --git a/PyTorchSimFrontend/extension_config.py b/PyTorchSimFrontend/extension_config.py index 7e9bde0e..af1e91d3 100644 --- a/PyTorchSimFrontend/extension_config.py +++ b/PyTorchSimFrontend/extension_config.py @@ -53,8 +53,6 @@ def __getattr__(name): default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json") if name == "CONFIG_TOGSIM_EAGER_MODE": return int(os.environ.get("TOGSIM_EAGER_MODE", default=False)) - if name == "CONFIG_TOGSIM_DRYRUN": - return int(os.environ.get('TOGSIM_DRYRUN', default=False)) if name == "CONFIG_TOGSIM_DEBUG_LEVEL": return os.environ.get("TOGSIM_DEBUG_LEVEL", "") diff --git a/PyTorchSimFrontend/extension_op.py b/PyTorchSimFrontend/extension_op.py index f0d0f20e..517773c1 100644 --- a/PyTorchSimFrontend/extension_op.py +++ b/PyTorchSimFrontend/extension_op.py @@ -46,7 +46,7 @@ class MLIRExternKernelChoice(ExternKernelChoice): def call_name(self): - is_dryrun = int(os.environ.get('TOGSIM_DRYRUN', default=False)) + is_dryrun = int(os.environ.get('TOGSIM_EAGER_MODE', default=False)) if is_dryrun: return f"yield from sparse_mm_dummy_stonne_outer" return f"torch.ops.extension_op.{self.name}" diff --git a/Scheduler/scheduler.py b/Scheduler/scheduler.py index 3bf28016..ffe8e4fc 100644 --- a/Scheduler/scheduler.py +++ b/Scheduler/scheduler.py @@ -159,7 +159,6 @@ def __init__(self, tog_simulator : TOGSimulator, num_partion=1) -> None: self.tog_simulator = tog_simulator # Dry run for compile and create generator - os.environ["TOGSIM_DRYRUN"] = "1" os.environ["TOGSIM_EAGER_MODE"] = "1" @staticmethod diff --git a/Simulator/simulator.py b/Simulator/simulator.py index 530cab8b..464c42c1 100644 --- a/Simulator/simulator.py +++ b/Simulator/simulator.py @@ -165,7 +165,7 @@ def show_progress(): gem5_cmd = [extension_config.CONFIG_GEM5_PATH, "-r", "--stdout-file=sto.log", "-d", dir_path, gem5_script_path, "-c", target_binary, "--vlane", str(vectorlane_size)] try: # Create progress thread - is_dryrun = int(os.environ.get('TOGSIM_DRYRUN', default=False)) or silent_mode + is_dryrun = int(os.environ.get('TOGSIM_EAGER_MODE', default=False)) or silent_mode if not is_dryrun: if extension_config.CONFIG_DEBUG_MODE: print("[Gem5] cmd> ", " ".join(gem5_cmd)) @@ -289,7 +289,7 @@ def wait(self): def send_command(self, command): if self.process: try: - if not extension_config.CONFIG_TOGSIM_DRYRUN: + if extension_config.CONFIG_TORCHSIM_DEBUG_MODE: print(command, flush=True) self.process.stdin.write(command + '\n') self.process.stdin.flush() diff --git a/TOGSim/src/main.cc b/TOGSim/src/main.cc index 1af11257..77c1bae7 100644 --- a/TOGSim/src/main.cc +++ b/TOGSim/src/main.cc @@ -9,7 +9,7 @@ namespace fs = std::filesystem; namespace po = boost::program_options; -const char* env_value = std::getenv("TOGSIM_DRYRUN"); +const char* env_value = std::getenv("TOGSIM_EAGER_MODE"); bool isDryRun = (env_value != nullptr && std::string(env_value) == "1"); void launchKernel(Simulator* simulator, std::string onnx_path, std::string attribute_path, std::string config_path, cycle_type request_time=0, int partiton_id=0) { From dac3b592d4dbb792f6dc5b06710f155e2b17d575 Mon Sep 17 00:00:00 2001 From: Wonhyuk Yang Date: Tue, 2 Dec 2025 18:43:47 +0000 Subject: [PATCH 11/21] [Refactor] Move configs folder to top-level folder --- PyTorchSimFrontend/extension_config.py | 2 +- PyTorchSimFrontend/extension_op.py | 2 +- README.md | 6 +++--- Simulator/simulator.py | 2 +- .../booksim2_configs/anynet.icnt | 0 .../booksim2_configs/anynet_file | 0 .../booksim2_configs/chiplet_32_32_2.icnt | 2 +- .../booksim2_configs/chiplet_32_32_2.net | 0 .../booksim2_configs/fly_c16_m16.icnt | 0 .../booksim2_configs/fly_c16_m32.icnt | 0 .../booksim2_configs/fly_c16_m8.icnt | 0 .../booksim2_configs/fly_c1_m1.icnt | 0 configs/booksim2_configs/fly_c1_m16.icnt | 18 ++++++++++++++++++ .../booksim2_configs/fly_c1_m2.icnt | 0 .../booksim2_configs/fly_c1_m8.icnt | 0 .../booksim2_configs/fly_c2_m32.icnt | 0 .../booksim2_configs/fly_c2_m8.icnt | 0 .../booksim2_configs/fly_c32_m32.icnt | 0 .../booksim2_configs/fly_c32_m4.icnt | 0 .../booksim2_configs/fly_c32_m8.icnt | 0 configs/booksim2_configs/fly_c4_m16.icnt | 17 +++++++++++++++++ .../booksim2_configs/fly_c4_m2.icnt | 0 .../booksim2_configs/fly_c4_m32.icnt | 0 .../booksim2_configs/fly_c4_m8.icnt | 0 .../booksim2_configs/fly_c64_m8.icnt | 0 .../booksim2_configs/fly_c64_m8_sif-age.icnt | 0 .../booksim2_configs/fly_c64_m8_sif-rr.icnt | 0 configs/booksim2_configs/fly_c8_m16.icnt | 17 +++++++++++++++++ .../booksim2_configs/make_anynet_topology.py | 0 .../booksim2_configs/mesh_sif-age.icnt | 0 .../booksim2_configs/mesh_sif-rr.icnt | 0 .../heterogeneous_c2_simple_noc.json | 0 .../ramulator2_configs/DDR4.yaml | 0 .../ramulator2_configs/HBM2.yaml | 0 .../ramulator2_configs/HBM2_TPUv3.yaml | 0 .../ramulator_configs/ALDRAM-config.cfg | 0 .../ramulator_configs/DDR3-config.cfg | 0 .../ramulator_configs/DDR4-config.cfg | 0 .../ramulator_configs/DSARP-config.cfg | 0 .../ramulator_configs/GDDR5-config.cfg | 0 .../ramulator_configs/HBM-config.cfg | 0 .../HBM-config_ChRaBaRoCo.cfg | 0 .../ramulator_configs/HBM-config_FCFS.cfg | 0 .../ramulator_configs/HBM-config_FRFCFS.cfg | 0 .../HBM-config_FRFCFS_Cap.cfg | 0 .../HBM-config_FRFCFS_PriorHit.cfg | 0 .../HBM-config_RoBaRaCoCh.cfg | 0 .../HBM-config_RoCoBaRaCh.cfg | 0 .../ramulator_configs/HBMx0.5ch-config.cfg | 0 .../ramulator_configs/HBMx2ch-config.cfg | 0 .../ramulator_configs/LPDDR3-config.cfg | 0 .../ramulator_configs/LPDDR4-config.cfg | 0 .../ramulator_configs/PCM-config.cfg | 0 .../ramulator_configs/SALP-config.cfg | 0 .../ramulator_configs/STTMRAM-config.cfg | 0 .../ramulator_configs/TLDRAM-config.cfg | 0 .../ramulator_configs/WideIO-config.cfg | 0 .../ramulator_configs/WideIO2-config.cfg | 0 .../stonne_big_c1_simple_noc.json | 0 .../stonne_single_c1_simple_noc.json | 0 .../stonne_validation_c1_simple_noc.json | 0 .../systolic_ws_128x128_c1_booksim_tpuv2.json | 0 .../systolic_ws_128x128_c1_booksim_tpuv3.json | 19 +++++++++++++++++++ ...stolic_ws_128x128_c1_simple_noc_tpuv2.json | 0 ...stolic_ws_128x128_c1_simple_noc_tpuv3.json | 0 ...c_ws_128x128_c1_simple_noc_tpuv3_half.json | 0 ...stolic_ws_128x128_c1_simple_noc_tpuv4.json | 0 .../systolic_ws_128x128_c2_booksim_tpuv3.json | 0 ...s_128x128_c2_booksim_tpuv3_bw_quarter.json | 0 .../systolic_ws_128x128_c2_chiplet_tpuv3.json | 0 ...lic_ws_128x128_c2_chiplet_tpuv3_xnuma.json | 0 ...stolic_ws_128x128_c2_simple_noc_tpuv2.json | 0 ...stolic_ws_128x128_c2_simple_noc_tpuv3.json | 0 ...128x128_c2_simple_noc_tpuv3_partition.json | 0 ...stolic_ws_128x128_c2_simple_noc_tpuv4.json | 0 .../systolic_ws_8x8_c1_12G_simple_noc.json | 0 .../systolic_ws_8x8_c1_24G_simple_noc.json | 0 .../systolic_ws_8x8_c1_48G_simple_noc.json | 0 .../systolic_ws_8x8_c1_booksim.json | 0 .../systolic_ws_8x8_c1_simple_noc.json | 0 .../systolic_ws_8x8_c2_12G_simple_noc.json | 0 .../systolic_ws_8x8_c2_24G_simple_noc.json | 0 .../systolic_ws_8x8_c2_48G_simple_noc.json | 0 experiments/BERT.py | 2 +- .../artifact/cycle_validation/run_cycle.sh | 2 +- .../speedup/scripts/run_speed_ils_bert.sh | 2 +- .../speedup/scripts/run_speed_ils_conv.sh | 2 +- .../speedup/scripts/run_speed_ils_matmul.sh | 2 +- .../speedup/scripts/run_speed_ils_resnet.sh | 2 +- experiments/attention.py | 2 +- experiments/conv.py | 2 +- experiments/gemm.py | 2 +- experiments/layernorm.py | 2 +- experiments/resnet18.py | 2 +- experiments/resnet50.py | 2 +- experiments/softmax.py | 2 +- scripts/CompilerOpt_experiment/DMAopt.sh | 2 +- scripts/chiplet.sh | 6 +++--- scripts/sparsity_experiment/run.sh | 12 ++++++------ scripts/stonne_experiment2/tog_gen.py | 2 +- tests/test_compile_overhead.py | 2 +- tests/test_hetro.py | 2 +- tests/test_scheduler.py | 2 +- tests/test_scheduler_batching.py | 2 +- tests/test_spmm_scheduler.py | 2 +- tutorial/session1/ExecutionMode.ipynb | 4 ++-- 106 files changed, 109 insertions(+), 38 deletions(-) rename {TOGSim/configs => configs}/booksim2_configs/anynet.icnt (100%) rename {TOGSim/configs => configs}/booksim2_configs/anynet_file (100%) rename {TOGSim/configs => configs}/booksim2_configs/chiplet_32_32_2.icnt (77%) rename {TOGSim/configs => configs}/booksim2_configs/chiplet_32_32_2.net (100%) rename {TOGSim/configs => configs}/booksim2_configs/fly_c16_m16.icnt (100%) rename {TOGSim/configs => configs}/booksim2_configs/fly_c16_m32.icnt (100%) rename {TOGSim/configs => configs}/booksim2_configs/fly_c16_m8.icnt (100%) rename {TOGSim/configs => configs}/booksim2_configs/fly_c1_m1.icnt (100%) create mode 100644 configs/booksim2_configs/fly_c1_m16.icnt rename {TOGSim/configs => configs}/booksim2_configs/fly_c1_m2.icnt (100%) rename {TOGSim/configs => configs}/booksim2_configs/fly_c1_m8.icnt (100%) rename {TOGSim/configs => configs}/booksim2_configs/fly_c2_m32.icnt (100%) rename {TOGSim/configs => configs}/booksim2_configs/fly_c2_m8.icnt (100%) rename {TOGSim/configs => configs}/booksim2_configs/fly_c32_m32.icnt (100%) rename {TOGSim/configs => configs}/booksim2_configs/fly_c32_m4.icnt (100%) rename {TOGSim/configs => configs}/booksim2_configs/fly_c32_m8.icnt (100%) create mode 100644 configs/booksim2_configs/fly_c4_m16.icnt rename {TOGSim/configs => configs}/booksim2_configs/fly_c4_m2.icnt (100%) rename {TOGSim/configs => configs}/booksim2_configs/fly_c4_m32.icnt (100%) rename {TOGSim/configs => configs}/booksim2_configs/fly_c4_m8.icnt (100%) rename {TOGSim/configs => configs}/booksim2_configs/fly_c64_m8.icnt (100%) rename {TOGSim/configs => configs}/booksim2_configs/fly_c64_m8_sif-age.icnt (100%) rename {TOGSim/configs => configs}/booksim2_configs/fly_c64_m8_sif-rr.icnt (100%) create mode 100644 configs/booksim2_configs/fly_c8_m16.icnt rename {TOGSim/configs => configs}/booksim2_configs/make_anynet_topology.py (100%) rename {TOGSim/configs => configs}/booksim2_configs/mesh_sif-age.icnt (100%) rename {TOGSim/configs => configs}/booksim2_configs/mesh_sif-rr.icnt (100%) rename {TOGSim/configs => configs}/heterogeneous_c2_simple_noc.json (100%) rename {TOGSim/configs => configs}/ramulator2_configs/DDR4.yaml (100%) rename {TOGSim/configs => configs}/ramulator2_configs/HBM2.yaml (100%) rename {TOGSim/configs => configs}/ramulator2_configs/HBM2_TPUv3.yaml (100%) rename {TOGSim/configs => configs}/ramulator_configs/ALDRAM-config.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/DDR3-config.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/DDR4-config.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/DSARP-config.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/GDDR5-config.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/HBM-config.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/HBM-config_ChRaBaRoCo.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/HBM-config_FCFS.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/HBM-config_FRFCFS.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/HBM-config_FRFCFS_Cap.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/HBM-config_FRFCFS_PriorHit.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/HBM-config_RoBaRaCoCh.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/HBM-config_RoCoBaRaCh.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/HBMx0.5ch-config.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/HBMx2ch-config.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/LPDDR3-config.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/LPDDR4-config.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/PCM-config.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/SALP-config.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/STTMRAM-config.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/TLDRAM-config.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/WideIO-config.cfg (100%) rename {TOGSim/configs => configs}/ramulator_configs/WideIO2-config.cfg (100%) rename {TOGSim/configs => configs}/stonne_big_c1_simple_noc.json (100%) rename {TOGSim/configs => configs}/stonne_single_c1_simple_noc.json (100%) rename {TOGSim/configs => configs}/stonne_validation_c1_simple_noc.json (100%) rename {TOGSim/configs => configs}/systolic_ws_128x128_c1_booksim_tpuv2.json (100%) create mode 100644 configs/systolic_ws_128x128_c1_booksim_tpuv3.json rename {TOGSim/configs => configs}/systolic_ws_128x128_c1_simple_noc_tpuv2.json (100%) rename {TOGSim/configs => configs}/systolic_ws_128x128_c1_simple_noc_tpuv3.json (100%) rename {TOGSim/configs => configs}/systolic_ws_128x128_c1_simple_noc_tpuv3_half.json (100%) rename {TOGSim/configs => configs}/systolic_ws_128x128_c1_simple_noc_tpuv4.json (100%) rename {TOGSim/configs => configs}/systolic_ws_128x128_c2_booksim_tpuv3.json (100%) rename {TOGSim/configs => configs}/systolic_ws_128x128_c2_booksim_tpuv3_bw_quarter.json (100%) rename {TOGSim/configs => configs}/systolic_ws_128x128_c2_chiplet_tpuv3.json (100%) rename {TOGSim/configs => configs}/systolic_ws_128x128_c2_chiplet_tpuv3_xnuma.json (100%) rename {TOGSim/configs => configs}/systolic_ws_128x128_c2_simple_noc_tpuv2.json (100%) rename {TOGSim/configs => configs}/systolic_ws_128x128_c2_simple_noc_tpuv3.json (100%) rename {TOGSim/configs => configs}/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json (100%) rename {TOGSim/configs => configs}/systolic_ws_128x128_c2_simple_noc_tpuv4.json (100%) rename {TOGSim/configs => configs}/systolic_ws_8x8_c1_12G_simple_noc.json (100%) rename {TOGSim/configs => configs}/systolic_ws_8x8_c1_24G_simple_noc.json (100%) rename {TOGSim/configs => configs}/systolic_ws_8x8_c1_48G_simple_noc.json (100%) rename {TOGSim/configs => configs}/systolic_ws_8x8_c1_booksim.json (100%) rename {TOGSim/configs => configs}/systolic_ws_8x8_c1_simple_noc.json (100%) rename {TOGSim/configs => configs}/systolic_ws_8x8_c2_12G_simple_noc.json (100%) rename {TOGSim/configs => configs}/systolic_ws_8x8_c2_24G_simple_noc.json (100%) rename {TOGSim/configs => configs}/systolic_ws_8x8_c2_48G_simple_noc.json (100%) diff --git a/PyTorchSimFrontend/extension_config.py b/PyTorchSimFrontend/extension_config.py index af1e91d3..de8bb6a5 100644 --- a/PyTorchSimFrontend/extension_config.py +++ b/PyTorchSimFrontend/extension_config.py @@ -50,7 +50,7 @@ def __getattr__(name): # TOGSim config if name == "CONFIG_TOGSIM_CONFIG": return os.environ.get('TOGSIM_CONFIG', - default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json") + default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json") if name == "CONFIG_TOGSIM_EAGER_MODE": return int(os.environ.get("TOGSIM_EAGER_MODE", default=False)) if name == "CONFIG_TOGSIM_DEBUG_LEVEL": diff --git a/PyTorchSimFrontend/extension_op.py b/PyTorchSimFrontend/extension_op.py index 517773c1..786e7398 100644 --- a/PyTorchSimFrontend/extension_op.py +++ b/PyTorchSimFrontend/extension_op.py @@ -276,7 +276,7 @@ def sparse_mm_stonne_outer(a, b, out): onnx_path, attribute_path, c_result_path = prepare_outer_product_matrix(a, b, out) togsim_path = os.path.join(extension_config.CONFIG_TORCHSIM_DIR, "TOGSim") - stonne_config_path = f'{extension_config.CONFIG_TORCHSIM_DIR}/TOGSim/configs/stonne_single_c1_simple_noc.json' + stonne_config_path = f'{extension_config.CONFIG_TORCHSIM_DIR}/configs/stonne_single_c1_simple_noc.json' TOGSim = TOGSimulator(togsim_path, stonne_config_path) result_path = TOGSim.simulation(onnx_path) TOGSimulator.get_result_from_file(result_path) diff --git a/README.md b/README.md index 75e08889..43de104b 100644 --- a/README.md +++ b/README.md @@ -131,7 +131,7 @@ Wrapper Codegen Path = /tmp/torchinductor_root/yd/cyda7nhzv5mtakfhfcxtmmhtsv6kg7 [Gem5Simulator] cmd> /workspace/gem5/build/RISCV/gem5.opt -r --stdout-file=sto.log -d /tmp/torchinductor/tmp/fy6nnyudtno/m5out /root/workspace/PyTorchSim/gem5_script/script_systolic.py -c /tmp/torchinductor/tmp/fy6nnyudtno/cycle_bin --vlane 128 [Gem5Simulator] Simulation is still running... [SpikeSimulator] cmd> spike --isa rv64gcv --varch=vlen:256,elen:64 --vectorlane-size=128 -m0x80000000:0x1900000000,0x2000000000:0x1000000 --scratchpad-base-paddr=137438953472 --scratchpad-base-vaddr=3489660928 --scratchpad-size=131072 --kernel-addr=0000000000010400:10846 --base-path=/tmp/torchinductor/tmp/fy6nnyudtno/runtime_0001 /workspace/riscv-pk/build/pk /tmp/torchinductor/tmp/fy6nnyudtno/validation_binary /tmp/torchinductor/tmp/fy6nnyudtno/runtime_0001/arg0_1/0.raw /tmp/torchinductor/tmp/fy6nnyudtno/runtime_0001/arg1_1/0.raw /tmp/torchinductor/tmp/fy6nnyudtno/runtime_0001/buf0/0.raw -[TOGSimulator] cmd> /root/workspace/PyTorchSim/TOGSim/build/bin/Simulator --config /root/workspace/PyTorchSim/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json --models_list /tmp/torchinductor/tmp/fy6nnyudtno/tile_graph.onnx --attributes_list /tmp/torchinductor/tmp/fy6nnyudtno/runtime_0001/attribute/0 +[TOGSimulator] cmd> /root/workspace/PyTorchSim/TOGSim/build/bin/Simulator --config /root/workspace/PyTorchSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json --models_list /tmp/torchinductor/tmp/fy6nnyudtno/tile_graph.onnx --attributes_list /tmp/torchinductor/tmp/fy6nnyudtno/runtime_0001/attribute/0 [TOGSimulator] Simulation is still running.. [TOGSimulator] Simulation of "/tmp/torchinductor/tmp/fy6nnyudtno/tile_graph.onnx" is stored to "/tmp/torchinductor/tmp/fy6nnyudtno/togsim_result/0" ---------------------------- @@ -195,7 +195,7 @@ import torch from torchvision.models import resnet18 from test_transformer import EncoderBlock base_path = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim') -config = f'{base_path}/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json' +config = f'{base_path}/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json' sys.path.append(base_path) from Scheduler.scheduler import Scheduler, SchedulerDNNModel, Request @@ -375,7 +375,7 @@ export TORCHSIM_USE_TIMING_POOLING=0 # use lightweight pooling for timing ``` You can set TOGSim config path as below. ```bash -export TORCHSIM_CONFIG=/workspace/PyTorchSim/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json +export TORCHSIM_CONFIG=/workspace/PyTorchSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json ``` ## Future Works Currently, PyTorchSim supports PyTorch 2.2. Support for newer versions will be added soon. diff --git a/Simulator/simulator.py b/Simulator/simulator.py index 464c42c1..79fc6858 100644 --- a/Simulator/simulator.py +++ b/Simulator/simulator.py @@ -456,6 +456,6 @@ def get_result_from_file(result_path): return core_metrics, dram_channel_bw, avg_dram_bw, simulation_time, total_cycle if __name__ == "__main__": - sim = TOGSimulator("/workspace/PyTorchSim/TOGSim", "/workspace/PyTorchSim/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json") + sim = TOGSimulator("/workspace/PyTorchSim/TOGSim", "/workspace/PyTorchSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json") sim.interactive_simulation() sim.until(4000) \ No newline at end of file diff --git a/TOGSim/configs/booksim2_configs/anynet.icnt b/configs/booksim2_configs/anynet.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/anynet.icnt rename to configs/booksim2_configs/anynet.icnt diff --git a/TOGSim/configs/booksim2_configs/anynet_file b/configs/booksim2_configs/anynet_file similarity index 100% rename from TOGSim/configs/booksim2_configs/anynet_file rename to configs/booksim2_configs/anynet_file diff --git a/TOGSim/configs/booksim2_configs/chiplet_32_32_2.icnt b/configs/booksim2_configs/chiplet_32_32_2.icnt similarity index 77% rename from TOGSim/configs/booksim2_configs/chiplet_32_32_2.icnt rename to configs/booksim2_configs/chiplet_32_32_2.icnt index 3102fecc..d677c3ed 100644 --- a/TOGSim/configs/booksim2_configs/chiplet_32_32_2.icnt +++ b/configs/booksim2_configs/chiplet_32_32_2.icnt @@ -2,7 +2,7 @@ use_map = 0 flit_size = 32 topology = anynet -network_file = /workspace/PyTorchSim/TOGSim/configs/booksim2_configs/chiplet_32_32_2.net +network_file = /workspace/PyTorchSim/configs/booksim2_configs/chiplet_32_32_2.net routing_function = min subnets = 1 routing_delay = 4 diff --git a/TOGSim/configs/booksim2_configs/chiplet_32_32_2.net b/configs/booksim2_configs/chiplet_32_32_2.net similarity index 100% rename from TOGSim/configs/booksim2_configs/chiplet_32_32_2.net rename to configs/booksim2_configs/chiplet_32_32_2.net diff --git a/TOGSim/configs/booksim2_configs/fly_c16_m16.icnt b/configs/booksim2_configs/fly_c16_m16.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c16_m16.icnt rename to configs/booksim2_configs/fly_c16_m16.icnt diff --git a/TOGSim/configs/booksim2_configs/fly_c16_m32.icnt b/configs/booksim2_configs/fly_c16_m32.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c16_m32.icnt rename to configs/booksim2_configs/fly_c16_m32.icnt diff --git a/TOGSim/configs/booksim2_configs/fly_c16_m8.icnt b/configs/booksim2_configs/fly_c16_m8.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c16_m8.icnt rename to configs/booksim2_configs/fly_c16_m8.icnt diff --git a/TOGSim/configs/booksim2_configs/fly_c1_m1.icnt b/configs/booksim2_configs/fly_c1_m1.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c1_m1.icnt rename to configs/booksim2_configs/fly_c1_m1.icnt diff --git a/configs/booksim2_configs/fly_c1_m16.icnt b/configs/booksim2_configs/fly_c1_m16.icnt new file mode 100644 index 00000000..48df18d1 --- /dev/null +++ b/configs/booksim2_configs/fly_c1_m16.icnt @@ -0,0 +1,18 @@ +[config] +use_map = 0 +flit_size = 32 +topology = fly +k = 17 +n = 1 +routing_function = dest_tag +subnets = 1 + +vc_buf_size = 256 +input_buffer_size = 256 +ejection_buffer_size = 256 +boundary_buffer_size = 256 +wait_for_tail_credit = 0 +vc_allocator = islip +sw_allocator = islip +alloc_iters = 1 +deadlock_warn_timeout = 10000 \ No newline at end of file diff --git a/TOGSim/configs/booksim2_configs/fly_c1_m2.icnt b/configs/booksim2_configs/fly_c1_m2.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c1_m2.icnt rename to configs/booksim2_configs/fly_c1_m2.icnt diff --git a/TOGSim/configs/booksim2_configs/fly_c1_m8.icnt b/configs/booksim2_configs/fly_c1_m8.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c1_m8.icnt rename to configs/booksim2_configs/fly_c1_m8.icnt diff --git a/TOGSim/configs/booksim2_configs/fly_c2_m32.icnt b/configs/booksim2_configs/fly_c2_m32.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c2_m32.icnt rename to configs/booksim2_configs/fly_c2_m32.icnt diff --git a/TOGSim/configs/booksim2_configs/fly_c2_m8.icnt b/configs/booksim2_configs/fly_c2_m8.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c2_m8.icnt rename to configs/booksim2_configs/fly_c2_m8.icnt diff --git a/TOGSim/configs/booksim2_configs/fly_c32_m32.icnt b/configs/booksim2_configs/fly_c32_m32.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c32_m32.icnt rename to configs/booksim2_configs/fly_c32_m32.icnt diff --git a/TOGSim/configs/booksim2_configs/fly_c32_m4.icnt b/configs/booksim2_configs/fly_c32_m4.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c32_m4.icnt rename to configs/booksim2_configs/fly_c32_m4.icnt diff --git a/TOGSim/configs/booksim2_configs/fly_c32_m8.icnt b/configs/booksim2_configs/fly_c32_m8.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c32_m8.icnt rename to configs/booksim2_configs/fly_c32_m8.icnt diff --git a/configs/booksim2_configs/fly_c4_m16.icnt b/configs/booksim2_configs/fly_c4_m16.icnt new file mode 100644 index 00000000..559855a3 --- /dev/null +++ b/configs/booksim2_configs/fly_c4_m16.icnt @@ -0,0 +1,17 @@ +[config] +use_map = 0 +flit_size = 64 +topology = fly +k = 20 +n = 1 +routing_function = dest_tag +subnets = 1 + +vc_buf_size = 64 +input_buffer_size = 256 +ejection_buffer_size = 64 +boundary_buffer_size = 64 +wait_for_tail_credit = 0 +vc_allocator = islip +sw_allocator = islip +alloc_iters = 1 diff --git a/TOGSim/configs/booksim2_configs/fly_c4_m2.icnt b/configs/booksim2_configs/fly_c4_m2.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c4_m2.icnt rename to configs/booksim2_configs/fly_c4_m2.icnt diff --git a/TOGSim/configs/booksim2_configs/fly_c4_m32.icnt b/configs/booksim2_configs/fly_c4_m32.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c4_m32.icnt rename to configs/booksim2_configs/fly_c4_m32.icnt diff --git a/TOGSim/configs/booksim2_configs/fly_c4_m8.icnt b/configs/booksim2_configs/fly_c4_m8.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c4_m8.icnt rename to configs/booksim2_configs/fly_c4_m8.icnt diff --git a/TOGSim/configs/booksim2_configs/fly_c64_m8.icnt b/configs/booksim2_configs/fly_c64_m8.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c64_m8.icnt rename to configs/booksim2_configs/fly_c64_m8.icnt diff --git a/TOGSim/configs/booksim2_configs/fly_c64_m8_sif-age.icnt b/configs/booksim2_configs/fly_c64_m8_sif-age.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c64_m8_sif-age.icnt rename to configs/booksim2_configs/fly_c64_m8_sif-age.icnt diff --git a/TOGSim/configs/booksim2_configs/fly_c64_m8_sif-rr.icnt b/configs/booksim2_configs/fly_c64_m8_sif-rr.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/fly_c64_m8_sif-rr.icnt rename to configs/booksim2_configs/fly_c64_m8_sif-rr.icnt diff --git a/configs/booksim2_configs/fly_c8_m16.icnt b/configs/booksim2_configs/fly_c8_m16.icnt new file mode 100644 index 00000000..3061d718 --- /dev/null +++ b/configs/booksim2_configs/fly_c8_m16.icnt @@ -0,0 +1,17 @@ +[config] +use_map = 0 +flit_size = 64 +topology = fly +k = 24 +n = 1 +routing_function = dest_tag +subnets = 1 + +vc_buf_size = 64 +input_buffer_size = 256 +ejection_buffer_size = 64 +boundary_buffer_size = 64 +wait_for_tail_credit = 0 +vc_allocator = islip +sw_allocator = islip +alloc_iters = 1 diff --git a/TOGSim/configs/booksim2_configs/make_anynet_topology.py b/configs/booksim2_configs/make_anynet_topology.py similarity index 100% rename from TOGSim/configs/booksim2_configs/make_anynet_topology.py rename to configs/booksim2_configs/make_anynet_topology.py diff --git a/TOGSim/configs/booksim2_configs/mesh_sif-age.icnt b/configs/booksim2_configs/mesh_sif-age.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/mesh_sif-age.icnt rename to configs/booksim2_configs/mesh_sif-age.icnt diff --git a/TOGSim/configs/booksim2_configs/mesh_sif-rr.icnt b/configs/booksim2_configs/mesh_sif-rr.icnt similarity index 100% rename from TOGSim/configs/booksim2_configs/mesh_sif-rr.icnt rename to configs/booksim2_configs/mesh_sif-rr.icnt diff --git a/TOGSim/configs/heterogeneous_c2_simple_noc.json b/configs/heterogeneous_c2_simple_noc.json similarity index 100% rename from TOGSim/configs/heterogeneous_c2_simple_noc.json rename to configs/heterogeneous_c2_simple_noc.json diff --git a/TOGSim/configs/ramulator2_configs/DDR4.yaml b/configs/ramulator2_configs/DDR4.yaml similarity index 100% rename from TOGSim/configs/ramulator2_configs/DDR4.yaml rename to configs/ramulator2_configs/DDR4.yaml diff --git a/TOGSim/configs/ramulator2_configs/HBM2.yaml b/configs/ramulator2_configs/HBM2.yaml similarity index 100% rename from TOGSim/configs/ramulator2_configs/HBM2.yaml rename to configs/ramulator2_configs/HBM2.yaml diff --git a/TOGSim/configs/ramulator2_configs/HBM2_TPUv3.yaml b/configs/ramulator2_configs/HBM2_TPUv3.yaml similarity index 100% rename from TOGSim/configs/ramulator2_configs/HBM2_TPUv3.yaml rename to configs/ramulator2_configs/HBM2_TPUv3.yaml diff --git a/TOGSim/configs/ramulator_configs/ALDRAM-config.cfg b/configs/ramulator_configs/ALDRAM-config.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/ALDRAM-config.cfg rename to configs/ramulator_configs/ALDRAM-config.cfg diff --git a/TOGSim/configs/ramulator_configs/DDR3-config.cfg b/configs/ramulator_configs/DDR3-config.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/DDR3-config.cfg rename to configs/ramulator_configs/DDR3-config.cfg diff --git a/TOGSim/configs/ramulator_configs/DDR4-config.cfg b/configs/ramulator_configs/DDR4-config.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/DDR4-config.cfg rename to configs/ramulator_configs/DDR4-config.cfg diff --git a/TOGSim/configs/ramulator_configs/DSARP-config.cfg b/configs/ramulator_configs/DSARP-config.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/DSARP-config.cfg rename to configs/ramulator_configs/DSARP-config.cfg diff --git a/TOGSim/configs/ramulator_configs/GDDR5-config.cfg b/configs/ramulator_configs/GDDR5-config.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/GDDR5-config.cfg rename to configs/ramulator_configs/GDDR5-config.cfg diff --git a/TOGSim/configs/ramulator_configs/HBM-config.cfg b/configs/ramulator_configs/HBM-config.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/HBM-config.cfg rename to configs/ramulator_configs/HBM-config.cfg diff --git a/TOGSim/configs/ramulator_configs/HBM-config_ChRaBaRoCo.cfg b/configs/ramulator_configs/HBM-config_ChRaBaRoCo.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/HBM-config_ChRaBaRoCo.cfg rename to configs/ramulator_configs/HBM-config_ChRaBaRoCo.cfg diff --git a/TOGSim/configs/ramulator_configs/HBM-config_FCFS.cfg b/configs/ramulator_configs/HBM-config_FCFS.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/HBM-config_FCFS.cfg rename to configs/ramulator_configs/HBM-config_FCFS.cfg diff --git a/TOGSim/configs/ramulator_configs/HBM-config_FRFCFS.cfg b/configs/ramulator_configs/HBM-config_FRFCFS.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/HBM-config_FRFCFS.cfg rename to configs/ramulator_configs/HBM-config_FRFCFS.cfg diff --git a/TOGSim/configs/ramulator_configs/HBM-config_FRFCFS_Cap.cfg b/configs/ramulator_configs/HBM-config_FRFCFS_Cap.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/HBM-config_FRFCFS_Cap.cfg rename to configs/ramulator_configs/HBM-config_FRFCFS_Cap.cfg diff --git a/TOGSim/configs/ramulator_configs/HBM-config_FRFCFS_PriorHit.cfg b/configs/ramulator_configs/HBM-config_FRFCFS_PriorHit.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/HBM-config_FRFCFS_PriorHit.cfg rename to configs/ramulator_configs/HBM-config_FRFCFS_PriorHit.cfg diff --git a/TOGSim/configs/ramulator_configs/HBM-config_RoBaRaCoCh.cfg b/configs/ramulator_configs/HBM-config_RoBaRaCoCh.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/HBM-config_RoBaRaCoCh.cfg rename to configs/ramulator_configs/HBM-config_RoBaRaCoCh.cfg diff --git a/TOGSim/configs/ramulator_configs/HBM-config_RoCoBaRaCh.cfg b/configs/ramulator_configs/HBM-config_RoCoBaRaCh.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/HBM-config_RoCoBaRaCh.cfg rename to configs/ramulator_configs/HBM-config_RoCoBaRaCh.cfg diff --git a/TOGSim/configs/ramulator_configs/HBMx0.5ch-config.cfg b/configs/ramulator_configs/HBMx0.5ch-config.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/HBMx0.5ch-config.cfg rename to configs/ramulator_configs/HBMx0.5ch-config.cfg diff --git a/TOGSim/configs/ramulator_configs/HBMx2ch-config.cfg b/configs/ramulator_configs/HBMx2ch-config.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/HBMx2ch-config.cfg rename to configs/ramulator_configs/HBMx2ch-config.cfg diff --git a/TOGSim/configs/ramulator_configs/LPDDR3-config.cfg b/configs/ramulator_configs/LPDDR3-config.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/LPDDR3-config.cfg rename to configs/ramulator_configs/LPDDR3-config.cfg diff --git a/TOGSim/configs/ramulator_configs/LPDDR4-config.cfg b/configs/ramulator_configs/LPDDR4-config.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/LPDDR4-config.cfg rename to configs/ramulator_configs/LPDDR4-config.cfg diff --git a/TOGSim/configs/ramulator_configs/PCM-config.cfg b/configs/ramulator_configs/PCM-config.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/PCM-config.cfg rename to configs/ramulator_configs/PCM-config.cfg diff --git a/TOGSim/configs/ramulator_configs/SALP-config.cfg b/configs/ramulator_configs/SALP-config.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/SALP-config.cfg rename to configs/ramulator_configs/SALP-config.cfg diff --git a/TOGSim/configs/ramulator_configs/STTMRAM-config.cfg b/configs/ramulator_configs/STTMRAM-config.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/STTMRAM-config.cfg rename to configs/ramulator_configs/STTMRAM-config.cfg diff --git a/TOGSim/configs/ramulator_configs/TLDRAM-config.cfg b/configs/ramulator_configs/TLDRAM-config.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/TLDRAM-config.cfg rename to configs/ramulator_configs/TLDRAM-config.cfg diff --git a/TOGSim/configs/ramulator_configs/WideIO-config.cfg b/configs/ramulator_configs/WideIO-config.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/WideIO-config.cfg rename to configs/ramulator_configs/WideIO-config.cfg diff --git a/TOGSim/configs/ramulator_configs/WideIO2-config.cfg b/configs/ramulator_configs/WideIO2-config.cfg similarity index 100% rename from TOGSim/configs/ramulator_configs/WideIO2-config.cfg rename to configs/ramulator_configs/WideIO2-config.cfg diff --git a/TOGSim/configs/stonne_big_c1_simple_noc.json b/configs/stonne_big_c1_simple_noc.json similarity index 100% rename from TOGSim/configs/stonne_big_c1_simple_noc.json rename to configs/stonne_big_c1_simple_noc.json diff --git a/TOGSim/configs/stonne_single_c1_simple_noc.json b/configs/stonne_single_c1_simple_noc.json similarity index 100% rename from TOGSim/configs/stonne_single_c1_simple_noc.json rename to configs/stonne_single_c1_simple_noc.json diff --git a/TOGSim/configs/stonne_validation_c1_simple_noc.json b/configs/stonne_validation_c1_simple_noc.json similarity index 100% rename from TOGSim/configs/stonne_validation_c1_simple_noc.json rename to configs/stonne_validation_c1_simple_noc.json diff --git a/TOGSim/configs/systolic_ws_128x128_c1_booksim_tpuv2.json b/configs/systolic_ws_128x128_c1_booksim_tpuv2.json similarity index 100% rename from TOGSim/configs/systolic_ws_128x128_c1_booksim_tpuv2.json rename to configs/systolic_ws_128x128_c1_booksim_tpuv2.json diff --git a/configs/systolic_ws_128x128_c1_booksim_tpuv3.json b/configs/systolic_ws_128x128_c1_booksim_tpuv3.json new file mode 100644 index 00000000..d458c90f --- /dev/null +++ b/configs/systolic_ws_128x128_c1_booksim_tpuv3.json @@ -0,0 +1,19 @@ +{ + "num_cores" : 1, + "core_freq_mhz" : 940, + "core_stats_print_period_cycles" : 10000, + "num_systolic_array_per_core" : 2, + + "dram_type" : "ramulator2", + "dram_freq_mhz" : 940, + "dram_channels": 16, + "dram_req_size_byte": 32, + "dram_num_burst_length" : 2, + "dram_stats_print_period_cycles": 10000, + "ramulator_config_path" : "../configs/ramulator2_configs/HBM2_TPUv3.yaml", + + "icnt_type" : "booksim2", + "icnt_freq_mhz" : 940, + "icnt_injection_ports_per_core" : 16, + "booksim_config_path" : "../configs/booksim2_configs/fly_c16_m16.icnt" +} diff --git a/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv2.json b/configs/systolic_ws_128x128_c1_simple_noc_tpuv2.json similarity index 100% rename from TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv2.json rename to configs/systolic_ws_128x128_c1_simple_noc_tpuv2.json diff --git a/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json b/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json similarity index 100% rename from TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json rename to configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json diff --git a/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3_half.json b/configs/systolic_ws_128x128_c1_simple_noc_tpuv3_half.json similarity index 100% rename from TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3_half.json rename to configs/systolic_ws_128x128_c1_simple_noc_tpuv3_half.json diff --git a/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv4.json b/configs/systolic_ws_128x128_c1_simple_noc_tpuv4.json similarity index 100% rename from TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv4.json rename to configs/systolic_ws_128x128_c1_simple_noc_tpuv4.json diff --git a/TOGSim/configs/systolic_ws_128x128_c2_booksim_tpuv3.json b/configs/systolic_ws_128x128_c2_booksim_tpuv3.json similarity index 100% rename from TOGSim/configs/systolic_ws_128x128_c2_booksim_tpuv3.json rename to configs/systolic_ws_128x128_c2_booksim_tpuv3.json diff --git a/TOGSim/configs/systolic_ws_128x128_c2_booksim_tpuv3_bw_quarter.json b/configs/systolic_ws_128x128_c2_booksim_tpuv3_bw_quarter.json similarity index 100% rename from TOGSim/configs/systolic_ws_128x128_c2_booksim_tpuv3_bw_quarter.json rename to configs/systolic_ws_128x128_c2_booksim_tpuv3_bw_quarter.json diff --git a/TOGSim/configs/systolic_ws_128x128_c2_chiplet_tpuv3.json b/configs/systolic_ws_128x128_c2_chiplet_tpuv3.json similarity index 100% rename from TOGSim/configs/systolic_ws_128x128_c2_chiplet_tpuv3.json rename to configs/systolic_ws_128x128_c2_chiplet_tpuv3.json diff --git a/TOGSim/configs/systolic_ws_128x128_c2_chiplet_tpuv3_xnuma.json b/configs/systolic_ws_128x128_c2_chiplet_tpuv3_xnuma.json similarity index 100% rename from TOGSim/configs/systolic_ws_128x128_c2_chiplet_tpuv3_xnuma.json rename to configs/systolic_ws_128x128_c2_chiplet_tpuv3_xnuma.json diff --git a/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json b/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json similarity index 100% rename from TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json rename to configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json diff --git a/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json b/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json similarity index 100% rename from TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json rename to configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json diff --git a/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json b/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json similarity index 100% rename from TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json rename to configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json diff --git a/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json b/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json similarity index 100% rename from TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json rename to configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json diff --git a/TOGSim/configs/systolic_ws_8x8_c1_12G_simple_noc.json b/configs/systolic_ws_8x8_c1_12G_simple_noc.json similarity index 100% rename from TOGSim/configs/systolic_ws_8x8_c1_12G_simple_noc.json rename to configs/systolic_ws_8x8_c1_12G_simple_noc.json diff --git a/TOGSim/configs/systolic_ws_8x8_c1_24G_simple_noc.json b/configs/systolic_ws_8x8_c1_24G_simple_noc.json similarity index 100% rename from TOGSim/configs/systolic_ws_8x8_c1_24G_simple_noc.json rename to configs/systolic_ws_8x8_c1_24G_simple_noc.json diff --git a/TOGSim/configs/systolic_ws_8x8_c1_48G_simple_noc.json b/configs/systolic_ws_8x8_c1_48G_simple_noc.json similarity index 100% rename from TOGSim/configs/systolic_ws_8x8_c1_48G_simple_noc.json rename to configs/systolic_ws_8x8_c1_48G_simple_noc.json diff --git a/TOGSim/configs/systolic_ws_8x8_c1_booksim.json b/configs/systolic_ws_8x8_c1_booksim.json similarity index 100% rename from TOGSim/configs/systolic_ws_8x8_c1_booksim.json rename to configs/systolic_ws_8x8_c1_booksim.json diff --git a/TOGSim/configs/systolic_ws_8x8_c1_simple_noc.json b/configs/systolic_ws_8x8_c1_simple_noc.json similarity index 100% rename from TOGSim/configs/systolic_ws_8x8_c1_simple_noc.json rename to configs/systolic_ws_8x8_c1_simple_noc.json diff --git a/TOGSim/configs/systolic_ws_8x8_c2_12G_simple_noc.json b/configs/systolic_ws_8x8_c2_12G_simple_noc.json similarity index 100% rename from TOGSim/configs/systolic_ws_8x8_c2_12G_simple_noc.json rename to configs/systolic_ws_8x8_c2_12G_simple_noc.json diff --git a/TOGSim/configs/systolic_ws_8x8_c2_24G_simple_noc.json b/configs/systolic_ws_8x8_c2_24G_simple_noc.json similarity index 100% rename from TOGSim/configs/systolic_ws_8x8_c2_24G_simple_noc.json rename to configs/systolic_ws_8x8_c2_24G_simple_noc.json diff --git a/TOGSim/configs/systolic_ws_8x8_c2_48G_simple_noc.json b/configs/systolic_ws_8x8_c2_48G_simple_noc.json similarity index 100% rename from TOGSim/configs/systolic_ws_8x8_c2_48G_simple_noc.json rename to configs/systolic_ws_8x8_c2_48G_simple_noc.json diff --git a/experiments/BERT.py b/experiments/BERT.py index c5bb454e..147ce7cf 100644 --- a/experiments/BERT.py +++ b/experiments/BERT.py @@ -36,7 +36,7 @@ def run_BERT(size, input_seq, config): import os import sys base_dir = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim') - config = os.environ.get('TORCHSIM_CONFIG', default=f'{base_dir}/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json') + config = os.environ.get('TORCHSIM_CONFIG', default=f'{base_dir}/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json') config_prefix = config.split('/')[-1].split('.')[0][9:] # extract config name from config path FIXME: gem5 result is different as directoy name sys.path.append(base_dir) args = argparse.ArgumentParser() diff --git a/experiments/artifact/cycle_validation/run_cycle.sh b/experiments/artifact/cycle_validation/run_cycle.sh index 28e6ad5e..99eed4ed 100755 --- a/experiments/artifact/cycle_validation/run_cycle.sh +++ b/experiments/artifact/cycle_validation/run_cycle.sh @@ -1,7 +1,7 @@ #!/bin/bash set -e -export TORCHSIM_CONFIG=$TORCHSIM_DIR/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json +export TORCHSIM_CONFIG=$TORCHSIM_DIR/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json LOG_DIR=$TORCHSIM_DIR/experiments/artifact/logs mkdir -p $LOG_DIR diff --git a/experiments/artifact/speedup/scripts/run_speed_ils_bert.sh b/experiments/artifact/speedup/scripts/run_speed_ils_bert.sh index 4055b355..fe872e02 100755 --- a/experiments/artifact/speedup/scripts/run_speed_ils_bert.sh +++ b/experiments/artifact/speedup/scripts/run_speed_ils_bert.sh @@ -26,7 +26,7 @@ for i in "${config[@]}"; do echo "===== config=$i | model=$ops =====" >> "$output_file" sum=0.0 count=0 - config_path="$TORCHSIM_DIR/TOGSim/configs/$i" + config_path="$TORCHSIM_DIR/configs/$i" for iter in {1..5}; do echo "[Iter $iter] Running simulation for workload=ils_$ops config=$config" diff --git a/experiments/artifact/speedup/scripts/run_speed_ils_conv.sh b/experiments/artifact/speedup/scripts/run_speed_ils_conv.sh index 83b3798a..19613a34 100755 --- a/experiments/artifact/speedup/scripts/run_speed_ils_conv.sh +++ b/experiments/artifact/speedup/scripts/run_speed_ils_conv.sh @@ -27,7 +27,7 @@ for i in "${config[@]}"; do echo "===== config=$i | model=$ops =====" >> "$output_file" sum=0.0 count=0 - config_path="$TORCHSIM_DIR/TOGSim/configs/$i" + config_path="$TORCHSIM_DIR/configs/$i" for iter in {1..5}; do echo "[Iter $iter] Running simulation for workload=ils_$ops config=$config" diff --git a/experiments/artifact/speedup/scripts/run_speed_ils_matmul.sh b/experiments/artifact/speedup/scripts/run_speed_ils_matmul.sh index f1467614..6f3385f1 100755 --- a/experiments/artifact/speedup/scripts/run_speed_ils_matmul.sh +++ b/experiments/artifact/speedup/scripts/run_speed_ils_matmul.sh @@ -25,7 +25,7 @@ for i in "${config[@]}"; do echo "===== config=$i | model=$ops =====" >> "$output_file" sum=0.0 count=0 - config_path="$TORCHSIM_DIR/TOGSim/configs/$i" + config_path="$TORCHSIM_DIR/configs/$i" for iter in {1..5}; do echo "[Iter $iter] Running simulation for workload=ils_$ops config=$config" diff --git a/experiments/artifact/speedup/scripts/run_speed_ils_resnet.sh b/experiments/artifact/speedup/scripts/run_speed_ils_resnet.sh index 2ed3ca2a..ca4cfa39 100755 --- a/experiments/artifact/speedup/scripts/run_speed_ils_resnet.sh +++ b/experiments/artifact/speedup/scripts/run_speed_ils_resnet.sh @@ -33,7 +33,7 @@ for i in "${config[@]}"; do echo "===== config=$i | model=$ops =====" >> "$output_file" sum=0.0 count=0 - config_path="$TORCHSIM_DIR/TOGSim/configs/$i" + config_path="$TORCHSIM_DIR/configs/$i" for iter in {1..5}; do echo "[Iter $iter] Running simulation for workload=ils_$ops config=$config" diff --git a/experiments/attention.py b/experiments/attention.py index 5a8c5f45..0dd36210 100644 --- a/experiments/attention.py +++ b/experiments/attention.py @@ -36,7 +36,7 @@ def attention(query, key, value): import os import sys base_dir = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim') - config = os.environ.get('TORCHSIM_CONFIG', default=f'{base_dir}/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json') + config = os.environ.get('TORCHSIM_CONFIG', default=f'{base_dir}/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json') config_prefix = config.split('/')[-1].split('.')[0][9:] # extract config name from config path sys.path.append(base_dir) args = argparse.ArgumentParser() diff --git a/experiments/conv.py b/experiments/conv.py index c8ca9a37..ecfcabce 100644 --- a/experiments/conv.py +++ b/experiments/conv.py @@ -37,7 +37,7 @@ def custom_conv2d(a, b, bias): import os import sys base_dir = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim') - config = os.environ.get('TORCHSIM_CONFIG', default=f'{base_dir}/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json') + config = os.environ.get('TORCHSIM_CONFIG', default=f'{base_dir}/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json') config_prefix = config.split('/')[-1].split('.')[0][9:] # extract config name from config path sys.path.append(base_dir) args = argparse.ArgumentParser() diff --git a/experiments/gemm.py b/experiments/gemm.py index 67dc4f79..02e650bd 100644 --- a/experiments/gemm.py +++ b/experiments/gemm.py @@ -31,7 +31,7 @@ def custom_matmul(a, b): import os import sys base_dir = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim') - config = os.environ.get('TORCHSIM_CONFIG', default=f'{base_dir}/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json') + config = os.environ.get('TORCHSIM_CONFIG', default=f'{base_dir}/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json') config_prefix = config.split('/')[-1].split('.')[0][9:] # extract config name from config path sys.path.append(base_dir) args = argparse.ArgumentParser() diff --git a/experiments/layernorm.py b/experiments/layernorm.py index 0beaac6c..02a5e0ea 100644 --- a/experiments/layernorm.py +++ b/experiments/layernorm.py @@ -27,7 +27,7 @@ def run_layernorm(size, config): import os import sys base_dir = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim') - config = os.environ.get('TORCHSIM_CONFIG', default=f'{base_dir}/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json') + config = os.environ.get('TORCHSIM_CONFIG', default=f'{base_dir}/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json') config_prefix = config.split('/')[-1].split('.')[0][9:] # extract config name from config path sys.path.append(base_dir) args = argparse.ArgumentParser() diff --git a/experiments/resnet18.py b/experiments/resnet18.py index 23d62e40..4f56dab2 100644 --- a/experiments/resnet18.py +++ b/experiments/resnet18.py @@ -29,7 +29,7 @@ def run_resnet(batch, config): import os import sys base_dir = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim') - config = os.environ.get('TORCHSIM_CONFIG', default=f'{base_dir}/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json') + config = os.environ.get('TORCHSIM_CONFIG', default=f'{base_dir}/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json') config_prefix = config.split('/')[-1].split('.')[0][9:] # extract config name from config path sys.path.append(base_dir) args = argparse.ArgumentParser() diff --git a/experiments/resnet50.py b/experiments/resnet50.py index 60a46071..9d5ba025 100644 --- a/experiments/resnet50.py +++ b/experiments/resnet50.py @@ -29,7 +29,7 @@ def run_resnet(batch, config): import os import sys base_dir = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim') - config = os.environ.get('TORCHSIM_CONFIG', default=f'{base_dir}/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json') + config = os.environ.get('TORCHSIM_CONFIG', default=f'{base_dir}/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json') config_prefix = config.split('/')[-1].split('.')[0][9:] # extract config name from config path sys.path.append(base_dir) args = argparse.ArgumentParser() diff --git a/experiments/softmax.py b/experiments/softmax.py index 532ef091..66cd8779 100644 --- a/experiments/softmax.py +++ b/experiments/softmax.py @@ -27,7 +27,7 @@ def run_softmax(size, config, dim=1): import os import sys base_dir = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim') - config = os.environ.get('TORCHSIM_CONFIG', default=f'{base_dir}/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json') + config = os.environ.get('TORCHSIM_CONFIG', default=f'{base_dir}/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json') config_prefix = config.split('/')[-1].split('.')[0][9:] # extract config name from config path sys.path.append(base_dir) args = argparse.ArgumentParser() diff --git a/scripts/CompilerOpt_experiment/DMAopt.sh b/scripts/CompilerOpt_experiment/DMAopt.sh index 22118b1e..5c2dc65c 100644 --- a/scripts/CompilerOpt_experiment/DMAopt.sh +++ b/scripts/CompilerOpt_experiment/DMAopt.sh @@ -1,5 +1,5 @@ #!/bin/bash -export TORCHSIM_CONFIG="/root/workspace/PyTorchSim/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json" +export TORCHSIM_CONFIG="/root/workspace/PyTorchSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json" # None FG DMA export TORCHSIM_SUBTILE=0 diff --git a/scripts/chiplet.sh b/scripts/chiplet.sh index 2989e4fd..0d56ecae 100755 --- a/scripts/chiplet.sh +++ b/scripts/chiplet.sh @@ -19,11 +19,11 @@ GEMM_DIR_NAME=$(basename "$GEMM_PATH") echo "GEMM Directory Name: $GEMM_DIR_NAME" CONFIG_LIST=( - "$TORCHSIM_DIR/TOGSim/configs/systolic_ws_128x128_c2_chiplet_tpuv3.json" + "$TORCHSIM_DIR/configs/systolic_ws_128x128_c2_chiplet_tpuv3.json" ) CONFIG_LIST2=( - "$TORCHSIM_DIR/TOGSim/configs/systolic_ws_128x128_c2_booksim_tpuv3.json" - "$TORCHSIM_DIR/TOGSim/configs/systolic_ws_128x128_c2_chiplet_tpuv3_xnuma.json" + "$TORCHSIM_DIR/configs/systolic_ws_128x128_c2_booksim_tpuv3.json" + "$TORCHSIM_DIR/configs/systolic_ws_128x128_c2_chiplet_tpuv3_xnuma.json" ) shift shift diff --git a/scripts/sparsity_experiment/run.sh b/scripts/sparsity_experiment/run.sh index 94e00527..4f5dd3a6 100755 --- a/scripts/sparsity_experiment/run.sh +++ b/scripts/sparsity_experiment/run.sh @@ -5,7 +5,7 @@ export TORCHSIM_FORCE_TIME_M=8 export TORCHSIM_FORCE_TIME_N=8 OUTPUT_DIR="12GB" -export TORCHSIM_CONFIG="/workspace/PyTorchSim/TOGSim/configs/systolic_ws_8x8_c1_12G_simple_noc.json" +export TORCHSIM_CONFIG="/workspace/PyTorchSim/configs/systolic_ws_8x8_c1_12G_simple_noc.json" python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.0 > ${OUTPUT_DIR}/0.0 python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.2 > ${OUTPUT_DIR}/0.2 python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.4 > ${OUTPUT_DIR}/0.4 @@ -13,7 +13,7 @@ python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.6 > ${OUTPUT python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.8 > ${OUTPUT_DIR}/0.8 OUTPUT_DIR="24GB" -export TORCHSIM_CONFIG="/workspace/PyTorchSim/TOGSim/configs/systolic_ws_8x8_c1_24G_simple_noc.json" +export TORCHSIM_CONFIG="/workspace/PyTorchSim/configs/systolic_ws_8x8_c1_24G_simple_noc.json" python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.0 > ${OUTPUT_DIR}/0.0 python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.2 > ${OUTPUT_DIR}/0.2 python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.4 > ${OUTPUT_DIR}/0.4 @@ -21,7 +21,7 @@ python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.6 > ${OUTPUT python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.8 > ${OUTPUT_DIR}/0.8 OUTPUT_DIR="48GB" -export TORCHSIM_CONFIG="/workspace/PyTorchSim/TOGSim/configs/systolic_ws_8x8_c1_48G_simple_noc.json" +export TORCHSIM_CONFIG="/workspace/PyTorchSim/configs/systolic_ws_8x8_c1_48G_simple_noc.json" python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.0 > ${OUTPUT_DIR}/0.0 python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.2 > ${OUTPUT_DIR}/0.2 python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.4 > ${OUTPUT_DIR}/0.4 @@ -29,7 +29,7 @@ python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.6 > ${OUTPUT python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.8 > ${OUTPUT_DIR}/0.8 OUTPUT_DIR="12GB_2core" -export TORCHSIM_CONFIG="/workspace/PyTorchSim/TOGSim/configs/systolic_ws_8x8_c2_12G_simple_noc.json" +export TORCHSIM_CONFIG="/workspace/PyTorchSim/configs/systolic_ws_8x8_c2_12G_simple_noc.json" python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.0 > ${OUTPUT_DIR}/0.0 python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.2 > ${OUTPUT_DIR}/0.2 python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.4 > ${OUTPUT_DIR}/0.4 @@ -37,7 +37,7 @@ python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.6 > ${OUTPUT python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.8 > ${OUTPUT_DIR}/0.8 OUTPUT_DIR="24GB_2core" -export TORCHSIM_CONFIG="/workspace/PyTorchSim/TOGSim/configs/systolic_ws_8x8_c2_24G_simple_noc.json" +export TORCHSIM_CONFIG="/workspace/PyTorchSim/configs/systolic_ws_8x8_c2_24G_simple_noc.json" python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.0 > ${OUTPUT_DIR}/0.0 python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.2 > ${OUTPUT_DIR}/0.2 python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.4 > ${OUTPUT_DIR}/0.4 @@ -45,7 +45,7 @@ python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.6 > ${OUTPUT python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.8 > ${OUTPUT_DIR}/0.8 OUTPUT_DIR="48GB_2core" -export TORCHSIM_CONFIG="/workspace/PyTorchSim/TOGSim/configs/systolic_ws_8x8_c2_48G_simple_noc.json" +export TORCHSIM_CONFIG="/workspace/PyTorchSim/configs/systolic_ws_8x8_c2_48G_simple_noc.json" python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.0 > ${OUTPUT_DIR}/0.0 python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.2 > ${OUTPUT_DIR}/0.2 python3 /workspace/PyTorchSim/tests/test_sparsity.py --sparsity 0.4 > ${OUTPUT_DIR}/0.4 diff --git a/scripts/stonne_experiment2/tog_gen.py b/scripts/stonne_experiment2/tog_gen.py index be30795b..d4f93d4d 100644 --- a/scripts/stonne_experiment2/tog_gen.py +++ b/scripts/stonne_experiment2/tog_gen.py @@ -72,7 +72,7 @@ def extract_simulation_stats(result_path): continue tog_path = os.path.join(path, "tile_graph.onnx") togsim_path = os.path.join(extension_config.CONFIG_TORCHSIM_DIR, "TOGSim") - stonne_config_path = f'{extension_config.CONFIG_TORCHSIM_DIR}/TOGSim/configs/stonne_validation_c1_simple_noc.json' + stonne_config_path = f'{extension_config.CONFIG_TORCHSIM_DIR}/configs/stonne_validation_c1_simple_noc.json' backsim = TOGSimulator(togsim_path, stonne_config_path) result_path = backsim.simulation(tog_path) nr_multiplications, total_cycle, sim_time = extract_simulation_stats(result_path) diff --git a/tests/test_compile_overhead.py b/tests/test_compile_overhead.py index c32b4364..030f548e 100644 --- a/tests/test_compile_overhead.py +++ b/tests/test_compile_overhead.py @@ -21,7 +21,7 @@ # shutil.rmtree("/tmp/torchinductor") #except FileNotFoundError: # print("no cache") - scheduler = Scheduler(num_request_queue=1, max_batch=4, engine_select=Scheduler.FIFO_ENGINE, togsim_config=f"{CONFIG_TORCHSIM_DIR}/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json") + scheduler = Scheduler(num_request_queue=1, max_batch=4, engine_select=Scheduler.FIFO_ENGINE, togsim_config=f"{CONFIG_TORCHSIM_DIR}/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json") # Register compiled model opt_model1 = torch.compile(target_model1.to(device=scheduler.execution_engine.module.custom_device(), memory_format=torch.channels_last), dynamic=False) SchedulerDNNModel.register_model("resnet18", opt_model1) diff --git a/tests/test_hetro.py b/tests/test_hetro.py index 557ea5d6..a0716e2d 100644 --- a/tests/test_hetro.py +++ b/tests/test_hetro.py @@ -26,7 +26,7 @@ def custom_matmul(a, b): K = args.K sparsity = args.sparsity mode = args.mode - config_path = f"{CONFIG_TORCHSIM_DIR}/TOGSim/configs/{args.config}" + config_path = f"{CONFIG_TORCHSIM_DIR}/configs/{args.config}" print("M: ", M) print("N: ", N) diff --git a/tests/test_scheduler.py b/tests/test_scheduler.py index 91bf0ad8..4860de56 100644 --- a/tests/test_scheduler.py +++ b/tests/test_scheduler.py @@ -7,7 +7,7 @@ base_path = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim') sys.path.append(base_path) from Scheduler.scheduler import Scheduler, SchedulerDNNModel, Request -config = f'{base_path}/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json' +config = f'{base_path}/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json' target_model1 = model1().eval() target_model2 = model2(768, 12).eval() diff --git a/tests/test_scheduler_batching.py b/tests/test_scheduler_batching.py index 5a34d161..53f9256d 100644 --- a/tests/test_scheduler_batching.py +++ b/tests/test_scheduler_batching.py @@ -17,7 +17,7 @@ target_model1 = model1().eval() # Init scheduler - scheduler = Scheduler(num_request_queue=1, max_batch=32, engine_select=Scheduler.FIFO_ENGINE, togsim_config=f"{CONFIG_TORCHSIM_DIR}/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json") + scheduler = Scheduler(num_request_queue=1, max_batch=32, engine_select=Scheduler.FIFO_ENGINE, togsim_config=f"{CONFIG_TORCHSIM_DIR}/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json") # Register compiled model opt_model1 = torch.compile(target_model1.to(device=scheduler.execution_engine.module.custom_device(), memory_format=torch.channels_last), dynamic=False) SchedulerDNNModel.register_model("resnet18", opt_model1) diff --git a/tests/test_spmm_scheduler.py b/tests/test_spmm_scheduler.py index c7abf0ae..71594eb2 100644 --- a/tests/test_spmm_scheduler.py +++ b/tests/test_spmm_scheduler.py @@ -25,7 +25,7 @@ output_size = args.output_size w1_sparsity = args.w1_sparsity w2_sparsity = args.w2_sparsity - config_path = f"{CONFIG_TORCHSIM_DIR}/TOGSim/configs/{args.config}" + config_path = f"{CONFIG_TORCHSIM_DIR}/configs/{args.config}" print("batch_size: ", batch_size) print("input_size: ", input_size) diff --git a/tutorial/session1/ExecutionMode.ipynb b/tutorial/session1/ExecutionMode.ipynb index d1b1aadf..a18ed90e 100644 --- a/tutorial/session1/ExecutionMode.ipynb +++ b/tutorial/session1/ExecutionMode.ipynb @@ -106,7 +106,7 @@ "metadata": {}, "outputs": [], "source": [ - "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/TOGSim/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json\"\n", + "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json\"\n", "\n", "input = torch.randn(2048, 2048).to(device=device)\n", "weight = torch.randn(2048, 2048).to(device=device)\n", @@ -137,7 +137,7 @@ "metadata": {}, "outputs": [], "source": [ - "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/TOGSim/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json\"\n", + "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json\"\n", "\n", "input = torch.randn(2048, 2048).to(device=device)\n", "weight = torch.randn(2048, 2048).to(device=device)\n", From 9db0f2cee97a276428d8cbe9500fb93879741dab Mon Sep 17 00:00:00 2001 From: Wonhyuk Yang Date: Tue, 2 Dec 2025 21:16:13 +0000 Subject: [PATCH 12/21] [Refactor] Integrate env & configs for an unified configuration file --- .github/workflows/pytorchsim_test.yml | 144 +++++++++--------- PyTorchSimFrontend/extension_codecache.py | 17 +-- PyTorchSimFrontend/extension_config.py | 135 +++++++--------- .../mlir/mlir_codegen_backend.py | 6 +- PyTorchSimFrontend/mlir/mlir_common.py | 6 +- PyTorchSimFrontend/mlir/mlir_conv_common.py | 2 +- PyTorchSimFrontend/mlir/mlir_lowering.py | 2 +- PyTorchSimFrontend/mlir/mlir_scheduling.py | 2 +- PyTorchSimFrontend/mlir/mlir_template.py | 2 +- README.md | 8 +- TOGSim/src/Simulator.cc | 2 +- .../systolic_ws_128x128_c1_booksim_tpuv2.json | 12 +- .../systolic_ws_128x128_c1_booksim_tpuv3.json | 15 +- ...stolic_ws_128x128_c1_simple_noc_tpuv2.json | 15 +- ...stolic_ws_128x128_c1_simple_noc_tpuv3.json | 15 +- ...c_ws_128x128_c1_simple_noc_tpuv3_half.json | 15 +- ...stolic_ws_128x128_c1_simple_noc_tpuv4.json | 15 +- .../systolic_ws_128x128_c2_booksim_tpuv3.json | 15 +- ...s_128x128_c2_booksim_tpuv3_bw_quarter.json | 15 +- .../systolic_ws_128x128_c2_chiplet_tpuv3.json | 15 +- ...lic_ws_128x128_c2_chiplet_tpuv3_xnuma.json | 15 +- ...stolic_ws_128x128_c2_simple_noc_tpuv2.json | 15 +- ...stolic_ws_128x128_c2_simple_noc_tpuv3.json | 15 +- ...128x128_c2_simple_noc_tpuv3_partition.json | 15 +- ...stolic_ws_128x128_c2_simple_noc_tpuv4.json | 15 +- experiments/BERT.py | 4 +- experiments/artifact/speedup/run_speedup.sh | 2 +- experiments/attention.py | 4 +- experiments/conv.py | 4 +- experiments/gemm.py | 4 +- experiments/layernorm.py | 4 +- experiments/resnet18.py | 4 +- experiments/resnet50.py | 4 +- experiments/softmax.py | 4 +- tutorial/session1/CompilerOptimization.ipynb | 2 +- tutorial/session1/ExecutionMode.ipynb | 12 +- tutorial/session1/LogAnalysis.ipynb | 4 +- 37 files changed, 370 insertions(+), 215 deletions(-) diff --git a/.github/workflows/pytorchsim_test.yml b/.github/workflows/pytorchsim_test.yml index 32d6543c..fe8a4a7d 100644 --- a/.github/workflows/pytorchsim_test.yml +++ b/.github/workflows/pytorchsim_test.yml @@ -33,8 +33,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_add.py test_transcendental: @@ -54,8 +54,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_transcendental.py test_activation: @@ -75,8 +75,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_activation.py test_batchnorm: @@ -96,8 +96,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_batchnorm.py test_bmm: @@ -117,8 +117,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_bmm.py test_cnn: @@ -138,8 +138,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_cnn.py test_conv2d: @@ -159,8 +159,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_conv2d.py test_matmul: @@ -180,8 +180,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_matmul.py test_reduce: @@ -201,8 +201,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_reduce.py test_softmax: @@ -222,8 +222,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_softmax.py test_transpose2D: @@ -243,8 +243,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_transpose2D.py test_view3D_2D: @@ -264,8 +264,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_view3D_2D.py test_layernorm: @@ -285,8 +285,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_layernorm.py test_mlp: @@ -306,8 +306,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_mlp.py test_resnet: @@ -327,8 +327,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_resnet.py - name: Run test_resnet50.py @@ -337,8 +337,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_resnet.py --model_type resnet50 test_transformer: @@ -358,8 +358,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_transformer.py test_transpose3D: @@ -379,8 +379,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_transpose3D.py test_sparsity: @@ -400,8 +400,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_sparsity.py test_pool: @@ -421,8 +421,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_pool.py test_perceptron: @@ -442,8 +442,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_single_perceptron.py test_fusion: @@ -463,8 +463,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_addmm_residual.py - name: Run test_matmul_activation.py @@ -473,8 +473,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_matmul_activation.py - name: Run test_matmul_scalar.py @@ -483,8 +483,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_matmul_scalar.py - name: Run test_matmul_reduction.py @@ -493,8 +493,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_matmul_reduction.py - name: Run test_bmm_reduction.py @@ -503,8 +503,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_bmm_reduction.py - name: Run test_prologue_fusion.py @@ -513,8 +513,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_prologue_fusion.py - name: Run test_transformer_fusion.py @@ -523,8 +523,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_transformer_fusion.py - name: Run test_conv_fusion.py @@ -533,8 +533,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/Fusion/test_conv_fusion.py test_moe: @@ -554,8 +554,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/MoE/test_moe.py test_mistral: @@ -575,8 +575,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/Mixtral_8x7B/test_attention.py test_vit: @@ -596,8 +596,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_vit.py test_diffusion: @@ -617,8 +617,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/Diffusion/test_diffusion.py test_indirect: @@ -638,8 +638,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_indirect_access.py test_scheduler: @@ -659,8 +659,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} python3 PyTorchSim/tests/test_scheduler.py test_accuracy: @@ -683,8 +683,8 @@ jobs: docker run --rm \ -v /tmp/torchsim-ci/${GITHUB_SHA}:/dump \ -e TORCHSIM_DUMP_PATH=/dump \ - -e TORCHSIM_VECTOR_LANE="${{ inputs.vector_lane }}" \ - -e TORCHSIM_SPAD_SIZE="${{ inputs.spad_size }}" \ + -e vpu_num_lanes="${{ inputs.vector_lane }}" \ + -e vpu_spad_size_kb_per_lane="${{ inputs.spad_size }}" \ ${{ inputs.image_name }} bash -c \ "cd /workspace && PyTorchSim/experiments/artifact/cycle_validation/run_cycle.sh && \ cp PyTorchSim/experiments/artifact/cycle_validation/summary_cycle.out /dump/summary_cycle.out" diff --git a/PyTorchSimFrontend/extension_codecache.py b/PyTorchSimFrontend/extension_codecache.py index 8c60df67..65e575d7 100644 --- a/PyTorchSimFrontend/extension_codecache.py +++ b/PyTorchSimFrontend/extension_codecache.py @@ -152,7 +152,7 @@ def load(cls, source_code, else: link_option = "" # Generate LLVM kernel calller and binary for validation - if extension_config.CONFIG_TORCHSIM_FUNCTIONAL_MODE: + if extension_config.pytorchsim_functional_mode: # Use custom malloc to avoid size error new_link_option = link_option + " -Wl,--wrap=malloc -Wl,--wrap=free" cmds = mlir_compile_command(new_input_path, vectorlane_size, vlen=vlen) @@ -169,7 +169,7 @@ def load(cls, source_code, print("Error output:", e.output) assert(0) - val_llvm_caller = MLIRKernelCallerCodeGen(extension_config.CONFIG_TORCHSIM_FUNCTIONAL_MODE, arg_attributes) + val_llvm_caller = MLIRKernelCallerCodeGen(extension_config.pytorchsim_functional_mode, arg_attributes) val_llvm_caller.generate_wrapper_file(write_path, validation_wrapper_name) val_llvm_caller.compile_wih_kernel(write_path, key, validation_wrapper_name, validation_binary_name, new_link_option) @@ -200,7 +200,7 @@ def load(cls, source_code, print("Error output:", e.output) assert(0) - if not extension_config.CONFIG_TORCHSIM_TIMING_MODE: + if not extension_config.pytorchsim_timing_mode: return key # Generate MLIR kernel calller and binary for cycle calculation @@ -271,13 +271,13 @@ def dummy_simulator(*args, **kwargs): # Dump arguments and meta data dump_metadata(args, arg_attributes, result_path) runtime_path = FunctionalSimulator.get_runtime_dump_path(result_path) - if not autotune and (extension_config.CONFIG_TORCHSIM_FUNCTIONAL_MODE or validate): + if not autotune and (extension_config.pytorchsim_functional_mode or validate): funcsim = FunctionalSimulator(result_path, key) funcsim.run_spike(args, arg_attributes, runtime_path, self.validation_binary_name, vectorlane_size=vectorlane_size, spad_info=spad_info, - cleanup=extension_config.CONFIG_CLEANUP_DUMP_ARGS, silent_mode=silent_mode) - if not extension_config.CONFIG_TORCHSIM_TIMING_MODE: + silent_mode=silent_mode) + if not extension_config.pytorchsim_timing_mode: return onnx_path = os.path.join(result_path, "tile_graph.onnx") @@ -303,12 +303,11 @@ def dryrun_simulator(*args, **kwargs): runtime_path = FunctionalSimulator.get_runtime_dump_path(result_path) # Todo. Support valude dependent mode for graph mode - if False: # extension_config.CONFIG_TORCHSIM_FUNCTIONAL_MODE: + if False: # extension_config.pytorchsim_functional_mode: funcsim = FunctionalSimulator(result_path, key) funcsim.run_spike(args, arg_attributes, runtime_path, self.validation_binary_name, - vectorlane_size=vectorlane_size, spad_info=spad_info, - cleanup=extension_config.CONFIG_CLEANUP_DUMP_ARGS) + vectorlane_size=vectorlane_size, spad_info=spad_info) return result_path, runtime_path, None is_dryrun = int(os.environ.get('TOGSIM_EAGER_MODE', default=False)) and not autotune diff --git a/PyTorchSimFrontend/extension_config.py b/PyTorchSimFrontend/extension_config.py index de8bb6a5..b86c3164 100644 --- a/PyTorchSimFrontend/extension_config.py +++ b/PyTorchSimFrontend/extension_config.py @@ -3,112 +3,89 @@ import tempfile import importlib import datetime +import json + +CONFIG_TORCHSIM_DIR = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim') +CONFIG_GEM5_PATH = os.environ.get('GEM5_PATH', default="/workspace/gem5/build/RISCV/gem5.opt") +CONFIG_TORCHSIM_LLVM_PATH = os.environ.get('TORCHSIM_LLVM_PATH', default="/usr/bin") + +CONFIG_TORCHSIM_DUMP_MLIR_IR = int(os.environ.get("TORCHSIM_DUMP_MLIR_IR", default=False)) +CONFIG_TORCHSIM_DUMP_LLVM_IR = int(os.environ.get("TORCHSIM_DUMP_LLVM_IR", default=False)) def __getattr__(name): + # TOGSim config + config_path = os.environ.get('TOGSIM_CONFIG', + default=f"{CONFIG_TORCHSIM_DIR}/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json") + if name == "CONFIG_TOGSIM_CONFIG": + return config_path + config_json = json.load(open(config_path, 'r')) # Hardware info config - if name == "CONFIG_VECTOR_LANE": - return int(os.environ.get("TORCHSIM_VECTOR_LANE", default=128)) - if name == "CONFIG_VECTOR_LANE_STRIDE": - return int(os.environ.get("TORCHSIM_VECTOR_LANE_STRIDE", default=2)) + if name == "vpu_num_lanes": + return config_json["vpu_num_lanes"] if name == "CONFIG_SPAD_INFO": return { "spad_vaddr" : 0xD0000000, "spad_paddr" : 0x2000000000, - "spad_size" : int(os.environ.get("TORCHSIM_SPAD_SIZE", default=128)) << 10 # Note: spad size per lane + "spad_size" : config_json["vpu_spad_size_kb_per_lane"] << 10 # Note: spad size per lane } + if name == "CONFIG_PRECISION": return 4 # 32bit if name == "CONFIG_NUM_CORES": - return 1 - if name == "CONFIG_VLEN": - return 256 # 256bits / 32bits = 8 [elements] + return config_json["num_cores"] + if name == "vpu_vector_length_bits": + return config_json["vpu_vector_length_bits"] - # Tile size config - if name == "CONFIG_TORCHSIM_DIR": - return os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim') + if name == "pytorchsim_functional_mode": + return config_json['pytorchsim_functional_mode'] + if name == "pytorchsim_timing_mode": + return config_json['pytorchsim_timing_mode'] - if name == "CONFIG_TORCHSIM_DUMP_PATH": - return os.environ.get('TORCHSIM_DUMP_PATH', default = __getattr__('CONFIG_TORCHSIM_DIR')) - if name == "CONFIG_TORCHSIM_LOG_PATH": - return os.environ.get('TORCHSIM_DUMP_LOG_PATH', default = os.path.join(__getattr__("CONFIG_TORCHSIM_DIR"), "outputs", datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))) - if name == "CONFIG_TORCHSIM_FUNCTIONAL_MODE": - return int(os.environ.get('TORCHSIM_FUNCTIONAL_MODE', default=True)) - if name == "CONFIG_TORCHSIM_TIMING_MODE": - return int(os.environ.get("TORCHSIM_TIMING_MODE", True)) - if name == "CONFIG_CLEANUP_DUMP_ARGS": - return int(os.environ.get('CLEANUP_DUMP_ARGS', default=False)) - - # LLVM PATH - if name == "CONFIG_TORCHSIM_LLVM_PATH": - return os.environ.get('TORCHSIM_LLVM_PATH', default="/usr/bin") - if name == "CONFIG_TORCHSIM_DUMP_MLIR_IR": - return int(os.environ.get("TORCHSIM_DUMP_MLIR_IR", default=False)) - if name == "CONFIG_TORCHSIM_DUMP_LLVM_IR": - return int(os.environ.get("TORCHSIM_DUMP_LLVM_IR", default=False)) + # Mapping strategy + if name == "codegen_mapping_strategy": + codegen_mapping_strategy = config_json["codegen_mapping_strategy"] + assert(codegen_mapping_strategy in ["heuristic", "autotune", "external-then-heuristic", "external-then-autotune"]), "Invalid mapping strategy!" + return codegen_mapping_strategy - # TOGSim config - if name == "CONFIG_TOGSIM_CONFIG": - return os.environ.get('TOGSIM_CONFIG', - default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json") - if name == "CONFIG_TOGSIM_EAGER_MODE": - return int(os.environ.get("TOGSIM_EAGER_MODE", default=False)) - if name == "CONFIG_TOGSIM_DEBUG_LEVEL": - return os.environ.get("TOGSIM_DEBUG_LEVEL", "") - - # GEM5 config - if name == "CONFIG_GEM5_PATH": - return os.environ.get('GEM5_PATH', default="/workspace/gem5/build/RISCV/gem5.opt") - - # Mapping Policy - if name == "CONFIG_MAPPING_POLICY": - return os.environ.get('TORCHSIM_MAPPING_POLICY', default="heuristic") # heuristic, manual, autotune - - # Manual Tile Size - if name == "CONFIG_TILE_M": - return int(os.getenv("TORCHSIM_TILE_M", __getattr__("CONFIG_VECTOR_LANE"))) - if name == "CONFIG_TILE_N": - return int(os.getenv("TORCHSIM_TILE_N", __getattr__("CONFIG_VECTOR_LANE"))) - if name == "CONFIG_TILE_K": - return int(os.getenv("TORCHSIM_TILE_K", __getattr__("CONFIG_VECTOR_LANE"))) - - if name == "CONFIG_MANUAL_SUBTILE_SIZE": - return int(os.environ.get('TORCHSIM_MANUAL_SUBTILE_SIZE', default=False)) - if name == "CONFIG_SUBTILE_M": - return int(os.environ.get('TORCHSIM_SUBTILE_M', default=__getattr__("CONFIG_VECTOR_LANE"))) - if name == "CONFIG_SUBTILE_N": - return int(os.environ.get('TORCHSIM_SUBTILE_N', default=__getattr__("CONFIG_VECTOR_LANE"))) - if name == "CONFIG_SUBTILE_K": - return int(os.environ.get('TORCHSIM_SUBTILE_K', default=__getattr__("CONFIG_VECTOR_LANE"))) + if name == "codegen_external_mapping_file": + return config_json["codegen_external_mapping_file"] # Autotune config - if name == "CONFIG_MAX_AUTOTUNE_TRY": - return int(os.environ.get('MAX_AUTOTUNE_TRY', default=10)) - if name == "CONFIG_AUTOTUNE_TEMPLATE_TOPK": - return int(os.environ.get('AUTOTUNE_TEMPLATE_TOPK', default=4)) - - if name == "CONFIG_GEMM_CHEATSHEET_PATH": - return os.environ.get('TORCHSIM_GEMM_CHEATSHEET_PATH', - default=f"{__getattr__('CONFIG_TORCHSIM_DIR')}/validation/gemm_tpuv3_cheatsheet.json") + if name == "codegen_autotune_max_retry": + return config_json["codegen_autotune_max_retry"] + if name == "codegen_autotune_template_topk": + return config_json["codegen_autotune_template_topk"] + # Compiler Optimization - if name == "CONFIG_COMPILER_OPTIMIZATION": - return os.environ.get('TORCHSIM_COMPILER_OPTIMIZATION', default="all") # options: all, none, custom + if name == "codegen_compiler_optimization": + return config_json["codegen_compiler_optimization"] # Advanced fusion options if name == "CONFIG_FUSION": - return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "fusion" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False + return True if (__getattr__("codegen_compiler_optimization") == "all" or "fusion" in __getattr__("codegen_compiler_optimization")) else False if name == "CONFIG_FUSION_REDUCTION_EPILOGUE": - return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "reduction_epliogue" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False + return True if (__getattr__("codegen_compiler_optimization") == "all" or "reduction_epliogue" in __getattr__("codegen_compiler_optimization")) else False if name == "CONFIG_FUSION_REDUCTION_REDUCTION": - return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "reduction_reduction" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False + return True if (__getattr__("codegen_compiler_optimization") == "all" or "reduction_reduction" in __getattr__("codegen_compiler_optimization")) else False if name == "CONFIG_FUSION_PROLOGUE": - return True if ((__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all") or ("prologue" in __getattr__("CONFIG_COMPILER_OPTIMIZATION"))) else False + return True if ((__getattr__("codegen_compiler_optimization") == "all") or ("prologue" in __getattr__("codegen_compiler_optimization"))) else False if name == "CONFIG_SINGLE_BATCH_CONV": - return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "single_batch_conv" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False + return True if (__getattr__("codegen_compiler_optimization") == "all" or "single_batch_conv" in __getattr__("codegen_compiler_optimization")) else False if name == "CONFIG_MULTI_TILE_CONV": - return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "multi_tile_conv" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False + return True if (__getattr__("codegen_compiler_optimization") == "all" or "multi_tile_conv" in __getattr__("codegen_compiler_optimization")) else False if name == "CONFIG_SUBTILE": - return True if (__getattr__("CONFIG_COMPILER_OPTIMIZATION") == "all" or "subtile" in __getattr__("CONFIG_COMPILER_OPTIMIZATION")) else False + return True if (__getattr__("codegen_compiler_optimization") == "all" or "subtile" in __getattr__("codegen_compiler_optimization")) else False + + if name == "CONFIG_TOGSIM_DEBUG_LEVEL": + return os.environ.get("TOGSIM_DEBUG_LEVEL", "") + if name == "CONFIG_TORCHSIM_DUMP_PATH": + return os.environ.get('TORCHSIM_DUMP_PATH', default = CONFIG_TORCHSIM_DIR) + if name == "CONFIG_TORCHSIM_LOG_PATH": + return os.environ.get('TORCHSIM_DUMP_LOG_PATH', default = os.path.join(CONFIG_TORCHSIM_DIR, "outputs", datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))) + + if name == "CONFIG_TOGSIM_EAGER_MODE": + return int(os.environ.get("TOGSIM_EAGER_MODE", default=False)) # SRAM Buffer allocation plan def load_plan_from_module(module_path): diff --git a/PyTorchSimFrontend/mlir/mlir_codegen_backend.py b/PyTorchSimFrontend/mlir/mlir_codegen_backend.py index 2732c25b..053879d7 100644 --- a/PyTorchSimFrontend/mlir/mlir_codegen_backend.py +++ b/PyTorchSimFrontend/mlir/mlir_codegen_backend.py @@ -1627,7 +1627,7 @@ def make_choices(self, nodes, kernel_name): def autotune(self, *args): def get_cycle(choice): bench_runner = choice[0] - for n_try in range(extension_config.CONFIG_MAX_AUTOTUNE_TRY): # TODO: make simple + for n_try in range(extension_config.codegen_autotune_max_retry): # TODO: make simple try: out = bench_runner() return out[-1] @@ -1664,7 +1664,7 @@ def run_bench(self, nodes, kernel_name, src_code): "spad_info": self.spad_info, "vlen" : self.vlen, "arg_attributes" : arg_attributes, - "validate" : extension_config.CONFIG_TORCHSIM_FUNCTIONAL_MODE, + "validate" : extension_config.pytorchsim_functional_mode, "autotune" : True, }, source_code=src_code, @@ -1683,7 +1683,7 @@ def _log_autotune_result(self, best_choice, best_cycle): def codegen_nodes(self, nodes, kernel_name): src_code = super().codegen_nodes(nodes, kernel_name) self._prepare_simulator_headers(src_code) - if extension_config.CONFIG_MAPPING_POLICY == "autotune" and extension_config.CONFIG_TORCHSIM_TIMING_MODE: + if extension_config.CONFIG_MAPPING_POLICY == "autotune" and extension_config.pytorchsim_timing_mode: optimal_src_code = self.autotune(nodes, kernel_name)[0] if optimal_src_code is not None: return optimal_src_code diff --git a/PyTorchSimFrontend/mlir/mlir_common.py b/PyTorchSimFrontend/mlir/mlir_common.py index c655dde3..4d33eea4 100644 --- a/PyTorchSimFrontend/mlir/mlir_common.py +++ b/PyTorchSimFrontend/mlir/mlir_common.py @@ -567,11 +567,11 @@ def set_tile_info(self, tile_desc : MLIRMultiDimTile): class BaseMLIRHardwareInfo(): def __init__(self): # Default HW setting - self.vector_lane = extension_config.CONFIG_VECTOR_LANE + self.vector_lane = extension_config.vpu_num_lanes self.spad_info = extension_config.CONFIG_SPAD_INFO self.precision = extension_config.CONFIG_PRECISION self.num_cores = extension_config.CONFIG_NUM_CORES - self.vlen = extension_config.CONFIG_VLEN + self.vlen = extension_config.vpu_vector_length_bits class BaseMLIRKernel(common.Kernel, BaseMLIRHardwareInfo): newvar_prefix = "%" @@ -700,7 +700,7 @@ def extract_dividers(self, implicit_ops): def compute_tile_size(self, nodes, vars, reduction_vars): vlane_split_axis = len(vars) - 1 - vlane_stride = extension_config.CONFIG_VECTOR_LANE_STRIDE + vlane_stride = 2 # Set minimum vlane stride # Set initial tile size & vector lane mapping if self.kernel_group.tile_desc is None: diff --git a/PyTorchSimFrontend/mlir/mlir_conv_common.py b/PyTorchSimFrontend/mlir/mlir_conv_common.py index 77826730..a1a9d935 100644 --- a/PyTorchSimFrontend/mlir/mlir_conv_common.py +++ b/PyTorchSimFrontend/mlir/mlir_conv_common.py @@ -93,7 +93,7 @@ def outer_func_render(self, kernel_name, input_args): OUTPUT=Y, PADDING_H=self.padding[0], PADDING_W=self.padding[1], - VALIDATION_MODE=extension_config.CONFIG_TORCHSIM_FUNCTIONAL_MODE, + VALIDATION_MODE=extension_config.pytorchsim_functional_mode, TOGSIM_EAGER_MODE=eager_mode, input_reorder=self.input_reorder ) diff --git a/PyTorchSimFrontend/mlir/mlir_lowering.py b/PyTorchSimFrontend/mlir/mlir_lowering.py index af59d88f..ebf0c80e 100644 --- a/PyTorchSimFrontend/mlir/mlir_lowering.py +++ b/PyTorchSimFrontend/mlir/mlir_lowering.py @@ -110,7 +110,7 @@ def convolution( mlir_template = MLIRConvSingleBatchTemplate([x, weight, bias], layout, **kwargs) elif BATCH == 1 and stride[0] != 1 and extension_config.CONFIG_SINGLE_BATCH_CONV: mlir_template = MLIRConvSingleBatchStridedTemplate([x, weight, bias], layout, **kwargs) - elif I_C < extension_config.CONFIG_VECTOR_LANE // 8 and extension_config.CONFIG_MULTI_TILE_CONV: # 8 is hard-coded for now. This should be changed to a better heuristic. + elif I_C < extension_config.vpu_num_lanes // 8 and extension_config.CONFIG_MULTI_TILE_CONV: # 8 is hard-coded for now. This should be changed to a better heuristic. mlir_template = MLIRConvMultiTileTemplate([x, weight, bias], layout, **kwargs) else: mlir_template = MLIRConvTemplate([x, weight, bias], layout, **kwargs) diff --git a/PyTorchSimFrontend/mlir/mlir_scheduling.py b/PyTorchSimFrontend/mlir/mlir_scheduling.py index 38603319..23be941c 100644 --- a/PyTorchSimFrontend/mlir/mlir_scheduling.py +++ b/PyTorchSimFrontend/mlir/mlir_scheduling.py @@ -257,7 +257,7 @@ def define_kernel(self, src_code, kernel_name, vector_lane, spad_info, loop_size codecache_def.writeline(f"spad_info={spad_info},") codecache_def.writeline(f"origins={origins},") codecache_def.writeline("arg_attributes=arg_attributes,") - codecache_def.writeline(f"vlen={extension_config.CONFIG_VLEN})") + codecache_def.writeline(f"vlen={extension_config.vpu_vector_length_bits})") wrapper.define_kernel(kernel_name, codecache_def.getvalue(), cuda=False) return kernel_name diff --git a/PyTorchSimFrontend/mlir/mlir_template.py b/PyTorchSimFrontend/mlir/mlir_template.py index 50f61a59..3c2fc4d5 100644 --- a/PyTorchSimFrontend/mlir/mlir_template.py +++ b/PyTorchSimFrontend/mlir/mlir_template.py @@ -1232,7 +1232,7 @@ def make_kernel_render( template=self, kwargs=kwargs ) - tile_candidates = self.get_tile_candidates(**kwargs)[:extension_config.CONFIG_AUTOTUNE_TEMPLATE_TOPK] + tile_candidates = self.get_tile_candidates(**kwargs)[:extension_config.codegen_autotune_template_topk] return kernel, tile_candidates, render return MLIRTemplateCaller( diff --git a/README.md b/README.md index 43de104b..1b6b744c 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,7 @@ Simulation consists of three steps If you want to turn off the `SpikeSimulator` for fast simulation, you can set as below. ```bash -export TORCHSIM_FUNCTIONAL_MODE=False +export pytorchsim_functional_mode=False ``` Log contains memory & core stats. ```bash @@ -329,8 +329,8 @@ Last but not least, you must set `l2d_type` and `l2d_config` in the [TOGSim conf You can configure these options using environment variables. ```bash -export TORCHSIM_VECTOR_LANE=128 # vector lane size -export TORCHSIM_VECTOR_LANE_STRIDE=2 # vector lane stride for DMA +export vpu_num_lanes=128 # vector lane size +export vpu_num_lanes_STRIDE=2 # vector lane stride for DMA export TORCHSIM_DIR=/workspace/PyTorchSim # home directory # Plan which tensor allocated in TPUv4's CMEM @@ -342,7 +342,7 @@ export TORCHSIM_USE_TIMING_POOLING=0 # use lightweight pooling for timing ## TOGSim Configuration ![NPU_Core](./docs/npu_core.jpg) -`TOGSim/configs` directory contains example NPU configuration files in the JSON format. +`configs` directory contains example NPU configuration files in the JSON format. ``` "num_cores" : 2, // Number of NPU cores "core_freq_mhz" : 940, // Core's frequency (MHz) diff --git a/TOGSim/src/Simulator.cc b/TOGSim/src/Simulator.cc index fa47f23f..857923c5 100644 --- a/TOGSim/src/Simulator.cc +++ b/TOGSim/src/Simulator.cc @@ -17,7 +17,7 @@ Simulator::Simulator(SimulationConfig config) _noc_node_per_core = config.icnt_injection_ports_per_core; char* onnxim_path_env = std::getenv("TORCHSIM_DIR"); std::string onnxim_path = onnxim_path_env != NULL? - std::string(onnxim_path_env) + "/TOGSim" : std::string("./"); + std::string(onnxim_path_env): std::string("./"); // Create core objects _cores.resize(_n_cores); diff --git a/configs/systolic_ws_128x128_c1_booksim_tpuv2.json b/configs/systolic_ws_128x128_c1_booksim_tpuv2.json index 58519aad..686827dc 100644 --- a/configs/systolic_ws_128x128_c1_booksim_tpuv2.json +++ b/configs/systolic_ws_128x128_c1_booksim_tpuv2.json @@ -3,6 +3,10 @@ "core_freq_mhz" : 700, "core_stats_print_period_cycles" : 10000, + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq_mhz" :700, "dram_channels": 16, @@ -15,5 +19,11 @@ "icnt_type" : "booksim2", "icnt_freq_mhz" : 700, "icnt_injection_ports_per_core" : 16, - "booksim_config_path" : "../configs/booksim2_configs/fly_c16_m16.icnt" + "booksim_config_path" : "../configs/booksim2_configs/fly_c16_m16.icnt", + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } \ No newline at end of file diff --git a/configs/systolic_ws_128x128_c1_booksim_tpuv3.json b/configs/systolic_ws_128x128_c1_booksim_tpuv3.json index d458c90f..1109dc0f 100644 --- a/configs/systolic_ws_128x128_c1_booksim_tpuv3.json +++ b/configs/systolic_ws_128x128_c1_booksim_tpuv3.json @@ -4,6 +4,10 @@ "core_stats_print_period_cycles" : 10000, "num_systolic_array_per_core" : 2, + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq_mhz" : 940, "dram_channels": 16, @@ -15,5 +19,14 @@ "icnt_type" : "booksim2", "icnt_freq_mhz" : 940, "icnt_injection_ports_per_core" : 16, - "booksim_config_path" : "../configs/booksim2_configs/fly_c16_m16.icnt" + "booksim_config_path" : "../configs/booksim2_configs/fly_c16_m16.icnt", + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } diff --git a/configs/systolic_ws_128x128_c1_simple_noc_tpuv2.json b/configs/systolic_ws_128x128_c1_simple_noc_tpuv2.json index d2e5790e..22aedcf8 100644 --- a/configs/systolic_ws_128x128_c1_simple_noc_tpuv2.json +++ b/configs/systolic_ws_128x128_c1_simple_noc_tpuv2.json @@ -3,6 +3,10 @@ "core_freq_mhz" : 700, "core_stats_print_period_cycles" : 10000, + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq_mhz" : 700, "dram_channels": 32, @@ -14,5 +18,14 @@ "icnt_type" : "simple", "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 700, - "icnt_injection_ports_per_core" : 16 + "icnt_injection_ports_per_core" : 16, + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } \ No newline at end of file diff --git a/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json b/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json index 828e44ca..c9763e92 100644 --- a/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json +++ b/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json @@ -4,6 +4,10 @@ "core_stats_print_period_cycles" : 10000, "num_systolic_array_per_core" : 2, + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq_mhz" : 940, "dram_channels": 16, @@ -15,5 +19,14 @@ "icnt_type" : "simple", "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 940, - "icnt_injection_ports_per_core" : 16 + "icnt_injection_ports_per_core" : 16, + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } \ No newline at end of file diff --git a/configs/systolic_ws_128x128_c1_simple_noc_tpuv3_half.json b/configs/systolic_ws_128x128_c1_simple_noc_tpuv3_half.json index 292967ac..980bfc73 100644 --- a/configs/systolic_ws_128x128_c1_simple_noc_tpuv3_half.json +++ b/configs/systolic_ws_128x128_c1_simple_noc_tpuv3_half.json @@ -4,6 +4,10 @@ "core_stats_print_period_cycles" : 10000, "num_systolic_array_per_core" : 2, + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq_mhz" : 940, "dram_channels": 8, @@ -15,5 +19,14 @@ "icnt_type" : "simple", "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 940, - "icnt_injection_ports_per_core" : 16 + "icnt_injection_ports_per_core" : 16, + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } \ No newline at end of file diff --git a/configs/systolic_ws_128x128_c1_simple_noc_tpuv4.json b/configs/systolic_ws_128x128_c1_simple_noc_tpuv4.json index 01156589..02bfd75c 100644 --- a/configs/systolic_ws_128x128_c1_simple_noc_tpuv4.json +++ b/configs/systolic_ws_128x128_c1_simple_noc_tpuv4.json @@ -4,6 +4,10 @@ "core_stats_print_period_cycles" : 10000, "num_systolic_array_per_core" : 4, + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq_mhz" :1200, "dram_channels": 16, @@ -17,5 +21,14 @@ "icnt_type" : "simple", "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 1050, - "icnt_injection_ports_per_core" : 16 + "icnt_injection_ports_per_core" : 16, + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } \ No newline at end of file diff --git a/configs/systolic_ws_128x128_c2_booksim_tpuv3.json b/configs/systolic_ws_128x128_c2_booksim_tpuv3.json index 271e7e1c..66566324 100644 --- a/configs/systolic_ws_128x128_c2_booksim_tpuv3.json +++ b/configs/systolic_ws_128x128_c2_booksim_tpuv3.json @@ -4,6 +4,10 @@ "core_stats_print_period_cycles" : 10000, "num_systolic_array_per_core" : 2, + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq_mhz" : 940, "dram_channels": 32, @@ -15,5 +19,14 @@ "icnt_type" : "booksim2", "icnt_freq_mhz" : 940, "icnt_injection_ports_per_core" : 16, - "booksim_config_path" : "../configs/booksim2_configs/fly_c32_m32.icnt" + "booksim_config_path" : "../configs/booksim2_configs/fly_c32_m32.icnt", + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } diff --git a/configs/systolic_ws_128x128_c2_booksim_tpuv3_bw_quarter.json b/configs/systolic_ws_128x128_c2_booksim_tpuv3_bw_quarter.json index 7cc113e6..8ef47e87 100644 --- a/configs/systolic_ws_128x128_c2_booksim_tpuv3_bw_quarter.json +++ b/configs/systolic_ws_128x128_c2_booksim_tpuv3_bw_quarter.json @@ -5,6 +5,10 @@ "core_print_interval" : 10000, "num_systolic_array_per_core" : 2, + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq" : 940, "dram_channels": 8, @@ -26,5 +30,14 @@ "partition": { "core_0":0, "core_1":0 - } + }, + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } \ No newline at end of file diff --git a/configs/systolic_ws_128x128_c2_chiplet_tpuv3.json b/configs/systolic_ws_128x128_c2_chiplet_tpuv3.json index 6561ffc0..ecd671bf 100644 --- a/configs/systolic_ws_128x128_c2_chiplet_tpuv3.json +++ b/configs/systolic_ws_128x128_c2_chiplet_tpuv3.json @@ -4,6 +4,10 @@ "core_stats_print_period_cycles" : 10000, "num_systolic_array_per_core" : 2, + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq_mhz" : 940, "dram_channels": 32, @@ -17,5 +21,14 @@ "icnt_freq_mhz" : 1000, "icnt_injection_ports_per_core" : 16, "booksim_config_path" : "../configs/booksim2_configs/chiplet_32_32_2.icnt", - "icnt_stats_print_period_cycles" : 10000 + "icnt_stats_print_period_cycles" : 10000, + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } \ No newline at end of file diff --git a/configs/systolic_ws_128x128_c2_chiplet_tpuv3_xnuma.json b/configs/systolic_ws_128x128_c2_chiplet_tpuv3_xnuma.json index fad63cc3..168fbe3a 100644 --- a/configs/systolic_ws_128x128_c2_chiplet_tpuv3_xnuma.json +++ b/configs/systolic_ws_128x128_c2_chiplet_tpuv3_xnuma.json @@ -4,6 +4,10 @@ "core_stats_print_period_cycles" : 10000, "num_systolic_array_per_core" : 2, + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq_mhz" : 940, "dram_channels": 32, @@ -16,5 +20,14 @@ "icnt_type" : "booksim2", "icnt_freq_mhz" : 1000, "icnt_injection_ports_per_core" : 16, - "booksim_config_path" : "../configs/booksim2_configs/chiplet_32_32_2.icnt" + "booksim_config_path" : "../configs/booksim2_configs/chiplet_32_32_2.icnt", + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } \ No newline at end of file diff --git a/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json b/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json index 89847917..cb1f7224 100644 --- a/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json +++ b/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json @@ -3,6 +3,10 @@ "core_freq_mhz" : 700, "core_stats_print_period_cycles" : 10000, + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq_mhz" :700, "dram_channels": 32, @@ -14,5 +18,14 @@ "icnt_type" : "simple", "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 700, - "icnt_injection_ports_per_core" : 16 + "icnt_injection_ports_per_core" : 16, + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } \ No newline at end of file diff --git a/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json b/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json index 593c78f2..cd3dd343 100644 --- a/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json +++ b/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json @@ -4,6 +4,10 @@ "core_stats_print_period_cycles" : 10000, "num_systolic_array_per_core" : 2, + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq_mhz" : 940, "dram_channels": 32, @@ -15,5 +19,14 @@ "icnt_type" : "simple", "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 940, - "icnt_injection_ports_per_core" : 16 + "icnt_injection_ports_per_core" : 16, + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } \ No newline at end of file diff --git a/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json b/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json index bd6cb071..681ef884 100644 --- a/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json +++ b/configs/systolic_ws_128x128_c2_simple_noc_tpuv3_partition.json @@ -4,6 +4,10 @@ "core_stats_print_period_cycles" : 10000, "num_systolic_array_per_core" : 2, + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq_mhz" : 940, "dram_channels": 32, @@ -21,5 +25,14 @@ "partition": { "core_0":0, "core_1":1 - } + }, + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } diff --git a/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json b/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json index f8b0fb95..d09228a1 100644 --- a/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json +++ b/configs/systolic_ws_128x128_c2_simple_noc_tpuv4.json @@ -4,6 +4,10 @@ "core_stats_print_period_cycles" : 10000, "num_systolic_array_per_core" : 4, + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq_mhz" :1200, "dram_channels": 32, @@ -17,5 +21,14 @@ "icnt_type" : "simple", "icnt_latency_cycles" : 10, "icnt_freq_mhz" : 1050, - "icnt_injection_ports_per_core" : 16 + "icnt_injection_ports_per_core" : 16, + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } \ No newline at end of file diff --git a/experiments/BERT.py b/experiments/BERT.py index 147ce7cf..3311682c 100644 --- a/experiments/BERT.py +++ b/experiments/BERT.py @@ -51,7 +51,7 @@ def run_BERT(size, input_seq, config): os.environ['TORCHSIM_DUMP_PATH'] = result_path # only timing simulation os.environ['TORCHSIM_VALIDATION_MODE'] = "0" - if 'TORCHSIM_FUNCTIONAL_MODE' in os.environ: - del os.environ['TORCHSIM_FUNCTIONAL_MODE'] + if 'pytorchsim_functional_mode' in os.environ: + del os.environ['pytorchsim_functional_mode'] run_BERT(size, input_seq, config) diff --git a/experiments/artifact/speedup/run_speedup.sh b/experiments/artifact/speedup/run_speedup.sh index 2b9625e9..9a19e9af 100755 --- a/experiments/artifact/speedup/run_speedup.sh +++ b/experiments/artifact/speedup/run_speedup.sh @@ -1,6 +1,6 @@ #!/bin/bash LOG_DIR=$TORCHSIM_DIR/experiments/artifact/logs -CONFIG_DIR="$TORCHSIM_DIR/TOGSim/configs" +CONFIG_DIR="$TORCHSIM_DIR/configs" SIMULATOR_BIN="$TORCHSIM_DIR/TOGSim/build/bin/Simulator" configs=( diff --git a/experiments/attention.py b/experiments/attention.py index 0dd36210..bbd2734e 100644 --- a/experiments/attention.py +++ b/experiments/attention.py @@ -50,7 +50,7 @@ def attention(query, key, value): os.environ['TORCHSIM_DUMP_PATH'] = result_path # only timing simulation os.environ['TORCHSIM_VALIDATION_MODE'] = "0" - if 'TORCHSIM_FUNCTIONAL_MODE' in os.environ: - del os.environ['TORCHSIM_FUNCTIONAL_MODE'] + if 'pytorchsim_functional_mode' in os.environ: + del os.environ['pytorchsim_functional_mode'] run_attention(size, config) diff --git a/experiments/conv.py b/experiments/conv.py index ecfcabce..f439c5e3 100644 --- a/experiments/conv.py +++ b/experiments/conv.py @@ -51,7 +51,7 @@ def custom_conv2d(a, b, bias): os.environ['TORCHSIM_DUMP_PATH'] = result_path # only timing simulation os.environ['TORCHSIM_VALIDATION_MODE'] = "0" - if 'TORCHSIM_FUNCTIONAL_MODE' in os.environ: - del os.environ['TORCHSIM_FUNCTIONAL_MODE'] + if 'pytorchsim_functional_mode' in os.environ: + del os.environ['pytorchsim_functional_mode'] run_conv2d(size[0], size[1], size[2], size[3], size[4], size[5], size[6], size[7], config) \ No newline at end of file diff --git a/experiments/gemm.py b/experiments/gemm.py index 02e650bd..e92200d1 100644 --- a/experiments/gemm.py +++ b/experiments/gemm.py @@ -45,8 +45,8 @@ def custom_matmul(a, b): os.environ['TORCHSIM_DUMP_PATH'] = result_path # only timing simulation os.environ['TORCHSIM_VALIDATION_MODE'] = "0" - if 'TORCHSIM_FUNCTIONAL_MODE' in os.environ: - del os.environ['TORCHSIM_FUNCTIONAL_MODE'] + if 'pytorchsim_functional_mode' in os.environ: + del os.environ['pytorchsim_functional_mode'] from Scheduler.scheduler import PyTorchSimRunner module = PyTorchSimRunner.setup_device() diff --git a/experiments/layernorm.py b/experiments/layernorm.py index 02a5e0ea..74b6d286 100644 --- a/experiments/layernorm.py +++ b/experiments/layernorm.py @@ -42,7 +42,7 @@ def run_layernorm(size, config): os.environ['TORCHSIM_FUSION_REDUCTION_REDUCTION'] = "0" # only timing simulation os.environ['TORCHSIM_VALIDATION_MODE'] = "0" - if 'TORCHSIM_FUNCTIONAL_MODE' in os.environ: - del os.environ['TORCHSIM_FUNCTIONAL_MODE'] + if 'pytorchsim_functional_mode' in os.environ: + del os.environ['pytorchsim_functional_mode'] run_layernorm(size, config) diff --git a/experiments/resnet18.py b/experiments/resnet18.py index 4f56dab2..45311d59 100644 --- a/experiments/resnet18.py +++ b/experiments/resnet18.py @@ -43,7 +43,7 @@ def run_resnet(batch, config): os.environ['TORCHSIM_USE_TIMING_POOLING'] = "1" # only timing simulation os.environ['TORCHSIM_VALIDATION_MODE'] = "0" - if 'TORCHSIM_FUNCTIONAL_MODE' in os.environ: - del os.environ['TORCHSIM_FUNCTIONAL_MODE'] + if 'pytorchsim_functional_mode' in os.environ: + del os.environ['pytorchsim_functional_mode'] run_resnet(batch, config) diff --git a/experiments/resnet50.py b/experiments/resnet50.py index 9d5ba025..4f03ea15 100644 --- a/experiments/resnet50.py +++ b/experiments/resnet50.py @@ -43,7 +43,7 @@ def run_resnet(batch, config): os.environ['TORCHSIM_USE_TIMING_POOLING'] = "1" # only timing simulation os.environ['TORCHSIM_VALIDATION_MODE'] = "0" - if 'TORCHSIM_FUNCTIONAL_MODE' in os.environ: - del os.environ['TORCHSIM_FUNCTIONAL_MODE'] + if 'pytorchsim_functional_mode' in os.environ: + del os.environ['pytorchsim_functional_mode'] run_resnet(batch, config) diff --git a/experiments/softmax.py b/experiments/softmax.py index 66cd8779..b47bd685 100644 --- a/experiments/softmax.py +++ b/experiments/softmax.py @@ -41,7 +41,7 @@ def run_softmax(size, config, dim=1): os.environ['TORCHSIM_DUMP_PATH'] = result_path # only timing simulation os.environ['TORCHSIM_VALIDATION_MODE'] = "0" - if 'TORCHSIM_FUNCTIONAL_MODE' in os.environ: - del os.environ['TORCHSIM_FUNCTIONAL_MODE'] + if 'pytorchsim_functional_mode' in os.environ: + del os.environ['pytorchsim_functional_mode'] run_softmax(size, config) diff --git a/tutorial/session1/CompilerOptimization.ipynb b/tutorial/session1/CompilerOptimization.ipynb index 6ea98cc7..d45aa857 100644 --- a/tutorial/session1/CompilerOptimization.ipynb +++ b/tutorial/session1/CompilerOptimization.ipynb @@ -18,7 +18,7 @@ "import sys\n", "base_dir = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim')\n", "sys.path.append(base_dir)\n", - "os.environ['TORCHSIM_FUNCTIONAL_MODE']=\"0\"\n", + "os.environ['pytorchsim_functional_mode']=\"0\"\n", "os.environ['TORCHSIM_TIMING_MODE']=\"1\"" ] }, diff --git a/tutorial/session1/ExecutionMode.ipynb b/tutorial/session1/ExecutionMode.ipynb index a18ed90e..12706edb 100644 --- a/tutorial/session1/ExecutionMode.ipynb +++ b/tutorial/session1/ExecutionMode.ipynb @@ -36,8 +36,8 @@ "from Scheduler.scheduler import PyTorchSimRunner\n", "device = PyTorchSimRunner.setup_device().custom_device()\n", "\n", - "os.environ['TORCHSIM_FUNCTIONAL_MODE']=\"1\"\n", - "os.environ['TORCHSIM_TIMING_MODE']=\"1\"\n", + "os.environ['pytorchsim_functional_mode']=\"1\"\n", + "os.environ['pytorchsim_timing_mode']=\"1\"\n", "\n", "input = torch.randn(1024, 1024).to(device=device)\n", "weight = torch.randn(1024, 1024).to(device=device)\n", @@ -59,8 +59,8 @@ "metadata": {}, "outputs": [], "source": [ - "os.environ['TORCHSIM_FUNCTIONAL_MODE']=\"1\"\n", - "os.environ['TORCHSIM_TIMING_MODE']=\"0\"\n", + "os.environ['pytorchsim_functional_mode']=\"1\"\n", + "os.environ['pytorchsim_timing_mode']=\"0\"\n", "\n", "input = torch.randn(1024, 1024).to(device=device)\n", "weight = torch.randn(1024, 1024).to(device=device)\n", @@ -82,8 +82,8 @@ "metadata": {}, "outputs": [], "source": [ - "os.environ['TORCHSIM_FUNCTIONAL_MODE']=\"0\"\n", - "os.environ['TORCHSIM_TIMING_MODE']=\"1\"\n", + "os.environ['pytorchsim_functional_mode']=\"0\"\n", + "os.environ['pytorchsim_timing_mode']=\"1\"\n", "\n", "input = torch.randn(1024, 1024).to(device=device)\n", "weight = torch.randn(1024, 1024).to(device=device)\n", diff --git a/tutorial/session1/LogAnalysis.ipynb b/tutorial/session1/LogAnalysis.ipynb index 24732dda..38846216 100644 --- a/tutorial/session1/LogAnalysis.ipynb +++ b/tutorial/session1/LogAnalysis.ipynb @@ -18,8 +18,8 @@ "import sys\n", "base_dir = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim')\n", "sys.path.append(base_dir)\n", - "os.environ['TORCHSIM_FUNCTIONAL_MODE']=\"0\"\n", - "os.environ['TORCHSIM_TIMING_MODE']=\"1\"" + "os.environ['pytorchsim_functional_mode']=\"0\"\n", + "os.environ['pytorchsim_timing_modededededede']=\"1\"" ] }, { From cf5b3a73a9373d3d42a600d501288952ee3e773d Mon Sep 17 00:00:00 2001 From: Wonhyuk Yang Date: Tue, 2 Dec 2025 22:14:06 +0000 Subject: [PATCH 13/21] [Tutorial] Hands-on2 session --- .../mlir/mlir_codegen_backend.py | 9 ++++++- tutorial/session2/Warmup.py | 27 +++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 tutorial/session2/Warmup.py diff --git a/PyTorchSimFrontend/mlir/mlir_codegen_backend.py b/PyTorchSimFrontend/mlir/mlir_codegen_backend.py index 053879d7..7a3fae82 100644 --- a/PyTorchSimFrontend/mlir/mlir_codegen_backend.py +++ b/PyTorchSimFrontend/mlir/mlir_codegen_backend.py @@ -426,7 +426,14 @@ def exp(operand, *args, var_info=None, **kwargs): @staticmethod def exp2(operand, *args, var_info=None, **kwargs): - raise NotImplementedError() + # Hands-on part: implement exp2 using math.exp2 + # var_info = {operand: [tile_size, dtype]} + # Ex) var_info[operand] = [8, "f32"] + + ln2 = math.log(2) + coeff = ops.constant(ln2, "f32") + operand = ops.mul(operand, coeff) + return ops.exp(operand), var_info[operand] @staticmethod def erf(operand, *args, var_info=None, **kwargs): diff --git a/tutorial/session2/Warmup.py b/tutorial/session2/Warmup.py new file mode 100644 index 00000000..ce215cf5 --- /dev/null +++ b/tutorial/session2/Warmup.py @@ -0,0 +1,27 @@ +from typing import List +import os +from torch.fx.passes.graph_drawer import FxGraphDrawer +os.environ['TORCH_LOGS'] = 'bytecode' +import torch + +def dummy_compiler(gm: torch.fx.GraphModule, _): + gm.graph.print_tabular() + drawer = FxGraphDrawer(gm, "my_model") + drawer.get_dot_graph().write_svg("fx_graph.svg") + return gm.forward # Return a callable object + +class MyModel(torch.nn.Module): + def forward(self, x, y): + z = torch.matmul(x, y) + return torch.relu(z) + +@torch.compile(backend=dummy_compiler) +def f(x, y): + my_model = MyModel() + return my_model(x, y) + +if __name__ == "__main__": + x = torch.randn(7, 5,requires_grad=False) + y = torch.randn(5, 3,requires_grad=False) + k = f(x, y) + print(k) From 0957c5888a75cc775df6be22f045e0468fb32eb4 Mon Sep 17 00:00:00 2001 From: Yunseon Shin Date: Tue, 2 Dec 2025 23:48:01 +0000 Subject: [PATCH 14/21] [refactor] dump folder --- PyTorchSimFrontend/extension_codecache.py | 6 +++--- PyTorchSimFrontend/extension_config.py | 4 +--- PyTorchSimFrontend/mlir/mlir_autotune.py | 4 ++-- Simulator/simulator.py | 3 ++- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/PyTorchSimFrontend/extension_codecache.py b/PyTorchSimFrontend/extension_codecache.py index 65e575d7..4d57b987 100644 --- a/PyTorchSimFrontend/extension_codecache.py +++ b/PyTorchSimFrontend/extension_codecache.py @@ -15,7 +15,7 @@ def hash_prefix(hash_value): return hash_value[1:12] def get_write_path(src_code): - return os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "tmp", hash_prefix(get_hash(src_code.strip()))) + return os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "outputs", hash_prefix(get_hash(src_code.strip()))) def dump_metadata(args, arg_attributes, path): meta_path = os.path.join(path, "meta.txt") @@ -267,7 +267,7 @@ def dummy_simulator(*args, **kwargs): lock = FileLock(os.path.join(lock_dir, key + ".lock"), timeout=LOCK_TIMEOUT) with lock: # Run simulator pass - result_path = os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "tmp", hash_prefix(key)) + result_path = os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "outputs", hash_prefix(key)) # Dump arguments and meta data dump_metadata(args, arg_attributes, result_path) runtime_path = FunctionalSimulator.get_runtime_dump_path(result_path) @@ -297,7 +297,7 @@ def dryrun_simulator(*args, **kwargs): lock = FileLock(os.path.join(lock_dir, key + ".lock"), timeout=LOCK_TIMEOUT) with lock: # Run simulator pass - result_path = os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "tmp", hash_prefix(key)) + result_path = os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "outputs", hash_prefix(key)) # Dump arguments and meta data dump_metadata(args, arg_attributes, result_path) runtime_path = FunctionalSimulator.get_runtime_dump_path(result_path) diff --git a/PyTorchSimFrontend/extension_config.py b/PyTorchSimFrontend/extension_config.py index b86c3164..5a4b8937 100644 --- a/PyTorchSimFrontend/extension_config.py +++ b/PyTorchSimFrontend/extension_config.py @@ -1,8 +1,6 @@ import os import sys -import tempfile import importlib -import datetime import json CONFIG_TORCHSIM_DIR = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim') @@ -82,7 +80,7 @@ def __getattr__(name): if name == "CONFIG_TORCHSIM_DUMP_PATH": return os.environ.get('TORCHSIM_DUMP_PATH', default = CONFIG_TORCHSIM_DIR) if name == "CONFIG_TORCHSIM_LOG_PATH": - return os.environ.get('TORCHSIM_DUMP_LOG_PATH', default = os.path.join(CONFIG_TORCHSIM_DIR, "outputs", datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))) + return os.environ.get('TORCHSIM_DUMP_LOG_PATH', default = os.path.join(CONFIG_TORCHSIM_DIR, "togsim_results")) if name == "CONFIG_TOGSIM_EAGER_MODE": return int(os.environ.get("TOGSIM_EAGER_MODE", default=False)) diff --git a/PyTorchSimFrontend/mlir/mlir_autotune.py b/PyTorchSimFrontend/mlir/mlir_autotune.py index e52d6cff..988408ea 100644 --- a/PyTorchSimFrontend/mlir/mlir_autotune.py +++ b/PyTorchSimFrontend/mlir/mlir_autotune.py @@ -21,7 +21,7 @@ def hash_prefix(hash_value): return hash_value[1:12] def get_write_path(src_code): - return os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "tmp", hash_prefix(get_hash(src_code.strip()))) + return os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "outputs", hash_prefix(get_hash(src_code.strip()))) @dataclasses.dataclass class MLIRBenchmarkRequest(): @@ -58,7 +58,7 @@ def make_run_fn( # Check already cached result. write_path = get_write_path(self.source_code) key, _ = write(self.source_code, "mlir", specified_dir=write_path) - result_path = os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "tmp", hash_prefix(key), "togsim_result/0") + result_path = os.path.join(extension_config.CONFIG_TORCHSIM_DUMP_PATH, "outputs", hash_prefix(key), "togsim_result/0") if os.path.exists(result_path): result = TOGSimulator.get_result_from_file(result_path) def cached_run_fn(*args, **kwargs): diff --git a/Simulator/simulator.py b/Simulator/simulator.py index 79fc6858..322d9b12 100644 --- a/Simulator/simulator.py +++ b/Simulator/simulator.py @@ -6,6 +6,7 @@ import sys import json import time +import datetime import threading from pathlib import Path @@ -245,7 +246,7 @@ def show_progress(): # Save result to result_path result_path = extension_config.CONFIG_TORCHSIM_LOG_PATH os.makedirs(result_path, exist_ok=True) - file_name = "togsim_result.log" + file_name = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')+".log" result_path = os.path.join(result_path, file_name) with open(result_path, "w") as f: f.write(result.decode()) From 5e26917ed41d82dbb09ffafd723b6900c5896c08 Mon Sep 17 00:00:00 2001 From: Yunseon Shin Date: Tue, 2 Dec 2025 23:49:10 +0000 Subject: [PATCH 15/21] [Tutorial] external mapping --- .../mlir/mlir_codegen_backend.py | 12 ++--- PyTorchSimFrontend/mlir/mlir_gemm_template.py | 47 +++++++------------ PyTorchSimFrontend/mlir/mlir_template.py | 2 +- .../session1/tutorial_external_mapping.json | 7 +++ validation/gemm_tpuv3_cheatsheet.json | 16 +++---- 5 files changed, 39 insertions(+), 45 deletions(-) create mode 100644 tutorial/session1/tutorial_external_mapping.json diff --git a/PyTorchSimFrontend/mlir/mlir_codegen_backend.py b/PyTorchSimFrontend/mlir/mlir_codegen_backend.py index 7a3fae82..3f055791 100644 --- a/PyTorchSimFrontend/mlir/mlir_codegen_backend.py +++ b/PyTorchSimFrontend/mlir/mlir_codegen_backend.py @@ -1690,7 +1690,7 @@ def _log_autotune_result(self, best_choice, best_cycle): def codegen_nodes(self, nodes, kernel_name): src_code = super().codegen_nodes(nodes, kernel_name) self._prepare_simulator_headers(src_code) - if extension_config.CONFIG_MAPPING_POLICY == "autotune" and extension_config.pytorchsim_timing_mode: + if "autotune" in extension_config.codegen_mapping_strategy and extension_config.pytorchsim_timing_mode: optimal_src_code = self.autotune(nodes, kernel_name)[0] if optimal_src_code is not None: return optimal_src_code @@ -1725,7 +1725,7 @@ def get_dma_info(self, name, index, broadcast=True, store_reduction=False, buffe """ # Use loads as default if buffer is None: - buffer = self.applys if "tmp" not in str(index) else self.dma_loads + buffer = self.applys if "outputs" not in str(index) else self.dma_loads # TODO. kg_tile_desc = self.kernel_group.tile_desc @@ -1736,7 +1736,7 @@ def get_dma_info(self, name, index, broadcast=True, store_reduction=False, buffe total_dims = [int(str(i)[5:]) for i in self.itervars] local_tile_desc = mlir_common.MLIRMultiDimTile([1], self.vector_lane) local_dims.sort() # Assume that smaller index is placed in the outer loop - indirect_dims = [f"{i}" for i in index.free_symbols if "tmp" in str(i)] + indirect_dims = [f"{i}" for i in index.free_symbols if "outputs" in str(i)] for indirect_dim in indirect_dims: index = index.replace(sympy.Symbol(indirect_dim), 0) @@ -1992,7 +1992,7 @@ def get_mask(self): return mask_shape, mask_var def convert_indirect_indexing(self, index :sympy.Expr): - if "tmp" not in str(index): + if "outputs" not in str(index): return index, None # Note: In case of indirect indexing, dimensions should be divisible by tile size @@ -2003,7 +2003,7 @@ def convert_indirect_indexing(self, index :sympy.Expr): raise mlir_common.RecompileSignal(f"Indirect access (tile size {self.kernel_group.tile_desc.get_tile_size()} is not divisible by {self.ranges})") # Process start - indirect_dims = [str(dim) for dim in index.free_symbols if "tmp" in str(dim)] + indirect_dims = [str(dim) for dim in index.free_symbols if "outputs" in str(dim)] indirect_dims.sort() first_dim = indirect_dims[0] spad_vars = dict() @@ -2051,7 +2051,7 @@ def convert_indirect_indexing(self, index :sympy.Expr): # Apply stride for arg in index.args: - if "tmp" not in str(arg): + if "outputs" not in str(arg): continue if arg.is_Mul and arg.args[0].is_number: coeff_dtype = self.var_info[spad_vars[str(arg.args[1])]][1] diff --git a/PyTorchSimFrontend/mlir/mlir_gemm_template.py b/PyTorchSimFrontend/mlir/mlir_gemm_template.py index 6b504b38..bbc63b45 100644 --- a/PyTorchSimFrontend/mlir/mlir_gemm_template.py +++ b/PyTorchSimFrontend/mlir/mlir_gemm_template.py @@ -297,31 +297,24 @@ def extract_info(self, template_buffer_node, epilogue_nodes, prologue_nodes): return X,W,Y,M,N,K,n_epilogue_node,n_prologue_node,len(n_extra_read) def select_tile(self, kernel, M, N, K, n_extra_node, n_extra_read, n_prologue_node): - # Check cheat sheet - cheatsheet_path = extension_config.CONFIG_GEMM_CHEATSHEET_PATH data = {} - if extension_config.CONFIG_GEMM_CHEATSHEET_PATH is not None: - path = Path(cheatsheet_path) - if path.is_file(): - with path.open("r") as f: - data = json.load(f) - - gemm_shape = f"{M}_{K}_{N}" - if extension_config.CONFIG_MAPPING_POLICY == "manual": + gemm_shape = f"{M}_{N}_{K}" + if "external" in extension_config.codegen_mapping_strategy: # case 1: use manual tile size - TILE_M = extension_config.CONFIG_TILE_M - TILE_N = extension_config.CONFIG_TILE_N - TILE_K = extension_config.CONFIG_TILE_K - tile_candidates = [[TILE_M, TILE_N, TILE_K]] - elif gemm_shape in data: - # case 2: cached tile size + path = Path(extension_config.codegen_external_mapping_file) + with path.open("r") as f: + data = json.load(f) + if gemm_shape in data: tile_info = data[gemm_shape] - TILE_M = tile_info["TILE_M"] - TILE_N = tile_info["TILE_N"] - TILE_K = tile_info["TILE_K"] - tile_candidates = [[TILE_M, TILE_N, TILE_K]] + if len(tile_info) == 3: + TILE_M, TILE_N, TILE_K = tile_info.values() + tile_candidates = [[TILE_M, TILE_N, TILE_K]] + elif len(tile_info) == 6: + TILE_M, TILE_N, TILE_K, SUB_TILE_M, SUB_TILE_N, SUB_TILE_K = tile_info.values() + full_tile_candidates = [[TILE_M, TILE_N, TILE_K, SUB_TILE_M, SUB_TILE_N, SUB_TILE_K]] + return full_tile_candidates else: - # case 3: use gemm_combination_mapping + # case 2: use heuristic mapping min_tile = (n_extra_node + n_prologue_node) == 0 tile_candidates = kernel.gemm_combination_mapping(M, N, K, max(n_extra_read-2, 0), n_prologue_node, min_tile=True) @@ -332,24 +325,18 @@ def select_tile(self, kernel, M, N, K, n_extra_node, n_extra_read, n_prologue_no full_tile_candidates = [] for idx, (TILE_M, TILE_N, TILE_K) in enumerate(tile_candidates): - # Calculate Sub Tile Size for fine-grained DMA + # Case 1: calculate sub tile size for fine-grained DMA if extension_config.CONFIG_SUBTILE: - # Case 1: adjust selective fine-grained DMA (SFG-DMA) SUB_TILE_M = TILE_M if (TILE_M < kernel.vector_lane or n_prologue_node) else kernel.vector_lane if (TILE_M == M and TILE_N == N and TILE_N <= 512): SUB_TILE_N = TILE_N if TILE_N < kernel.vector_lane else kernel.vector_lane else: # Avoid Row Conflict of weights SUB_TILE_N = TILE_N SUB_TILE_K = TILE_K - # Case 2: use manual sub tile size (FG-DMA) - if extension_config.CONFIG_MANUAL_SUBTILE_SIZE: - SUB_TILE_M = extension_config.CONFIG_SUBTILE_M - SUB_TILE_N = extension_config.CONFIG_SUBTILE_N - SUB_TILE_K = extension_config.CONFIG_SUBTILE_K - # Case 3: None Subtile + # Case 2: None Subtile else: SUB_TILE_M = TILE_M SUB_TILE_N = TILE_N SUB_TILE_K = TILE_K - full_tile_candidates.append([TILE_M,TILE_N,TILE_K, SUB_TILE_M,SUB_TILE_N,SUB_TILE_K]) + full_tile_candidates.append([TILE_M,TILE_N,TILE_K, SUB_TILE_M, SUB_TILE_N, SUB_TILE_K]) return full_tile_candidates diff --git a/PyTorchSimFrontend/mlir/mlir_template.py b/PyTorchSimFrontend/mlir/mlir_template.py index 3c2fc4d5..e493464a 100644 --- a/PyTorchSimFrontend/mlir/mlir_template.py +++ b/PyTorchSimFrontend/mlir/mlir_template.py @@ -508,7 +508,7 @@ def _log_autotune_result(self, best_choice, best_cycle): ) def codegen_nodes(self, tile_candidates, render, template_node, prologue_nodes, epilogue_nodes): - if extension_config.CONFIG_MAPPING_POLICY == "autotune" and len(tile_candidates): + if "autotune" in extension_config.codegen_mapping_strategy and len(tile_candidates): src_code, loop_size = self.autotune(tile_candidates, render, template_node, prologue_nodes, epilogue_nodes) self.loop_size = loop_size else: diff --git a/tutorial/session1/tutorial_external_mapping.json b/tutorial/session1/tutorial_external_mapping.json new file mode 100644 index 00000000..3982d950 --- /dev/null +++ b/tutorial/session1/tutorial_external_mapping.json @@ -0,0 +1,7 @@ +{ + "1024_1024_1024" : { + "TILE_M" : 512, + "TILE_N" : 512, + "TILE_K" : 512 + } +} \ No newline at end of file diff --git a/validation/gemm_tpuv3_cheatsheet.json b/validation/gemm_tpuv3_cheatsheet.json index 76a26e1a..e7fd0a6f 100644 --- a/validation/gemm_tpuv3_cheatsheet.json +++ b/validation/gemm_tpuv3_cheatsheet.json @@ -1,17 +1,17 @@ { - "512_2048_8192" : { + "512_8192_2048" : { "TILE_M" : 512, - "TILE_K" : 512, - "TILE_N" : 1024 + "TILE_N" : 1024, + "TILE_K" : 512 }, "512_2048_2048" : { "TILE_M" : 512, - "TILE_K" : 512, - "TILE_N" : 1024 + "TILE_N" : 1024, + "TILE_K" : 512 }, - "2048_2048_512" : { + "2048_512_2048" : { "TILE_M" : 1024, - "TILE_K" : 512, - "TILE_N" : 512 + "TILE_N" : 512, + "TILE_K" : 512 } } \ No newline at end of file From 83700d5c2cbb9ca4e89694cf056ba67bf0eacb20 Mon Sep 17 00:00:00 2001 From: Yunseon Shin Date: Tue, 2 Dec 2025 23:50:15 +0000 Subject: [PATCH 16/21] [Tutorial] Basic usage --- ...stolic_ws_128x128_c1_simple_noc_tpuv3.json | 2 +- ...stolic_ws_128x128_c2_simple_noc_tpuv2.json | 2 +- ...stolic_ws_128x128_c2_simple_noc_tpuv3.json | 2 +- tutorial/session1/CompilerOptimization.ipynb | 9 +++--- tutorial/session1/ExecutionMode.ipynb | 13 +++----- tutorial/session1/LogAnalysis.ipynb | 4 +-- tutorial/session1/Mapping.ipynb | 7 ++-- .../togsim_configs/togsim_config.json | 32 +++++++++++++++++++ .../togsim_configs/togsim_config_2_cores.json | 32 +++++++++++++++++++ .../togsim_config_autotune.json | 32 +++++++++++++++++++ .../togsim_config_external_mapping.json | 32 +++++++++++++++++++ .../togsim_config_functional_only.json | 32 +++++++++++++++++++ ...ogsim_config_no_compiler_optimization.json | 32 +++++++++++++++++++ .../togsim_config_timing_only.json | 32 +++++++++++++++++++ 14 files changed, 239 insertions(+), 24 deletions(-) create mode 100644 tutorial/session1/togsim_configs/togsim_config.json create mode 100644 tutorial/session1/togsim_configs/togsim_config_2_cores.json create mode 100644 tutorial/session1/togsim_configs/togsim_config_autotune.json create mode 100644 tutorial/session1/togsim_configs/togsim_config_external_mapping.json create mode 100644 tutorial/session1/togsim_configs/togsim_config_functional_only.json create mode 100644 tutorial/session1/togsim_configs/togsim_config_no_compiler_optimization.json create mode 100644 tutorial/session1/togsim_configs/togsim_config_timing_only.json diff --git a/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json b/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json index c9763e92..e8e489d9 100644 --- a/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json +++ b/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json @@ -24,7 +24,7 @@ "pytorchsim_functional_mode" : 1, "pytorchsim_timing_mode" : 1, - "codegen_mapping_strategy" : "autotune", + "codegen_mapping_strategy" : "heuristic", "codegen_external_mapping_file" : "", "codegen_autotune_max_retry": 10, "codegen_autotune_template_topk": 4, diff --git a/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json b/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json index cb1f7224..0a5f15b2 100644 --- a/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json +++ b/configs/systolic_ws_128x128_c2_simple_noc_tpuv2.json @@ -23,7 +23,7 @@ "pytorchsim_functional_mode" : 1, "pytorchsim_timing_mode" : 1, - "codegen_mapping_strategy" : "autotune", + "codegen_mapping_strategy" : "heuristic", "codegen_external_mapping_file" : "", "codegen_autotune_max_retry": 10, "codegen_autotune_template_topk": 4, diff --git a/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json b/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json index cd3dd343..f099b93d 100644 --- a/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json +++ b/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json @@ -24,7 +24,7 @@ "pytorchsim_functional_mode" : 1, "pytorchsim_timing_mode" : 1, - "codegen_mapping_strategy" : "autotune", + "codegen_mapping_strategy" : "heuristic", "codegen_external_mapping_file" : "", "codegen_autotune_max_retry": 10, "codegen_autotune_template_topk": 4, diff --git a/tutorial/session1/CompilerOptimization.ipynb b/tutorial/session1/CompilerOptimization.ipynb index d45aa857..178974c1 100644 --- a/tutorial/session1/CompilerOptimization.ipynb +++ b/tutorial/session1/CompilerOptimization.ipynb @@ -18,8 +18,7 @@ "import sys\n", "base_dir = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim')\n", "sys.path.append(base_dir)\n", - "os.environ['pytorchsim_functional_mode']=\"0\"\n", - "os.environ['TORCHSIM_TIMING_MODE']=\"1\"" + "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/tutorial/session1/togsim_configs/togsim_config_timing_only.json\"" ] }, { @@ -35,7 +34,7 @@ "metadata": {}, "outputs": [], "source": [ - "os.environ['TORCHSIM_DUMP_PATH']=os.path.join(base_dir, \"fused\")\n", + "os.environ['TORCHSIM_DUMP_PATH']=os.path.join(os.getcwd(), \"fused\")\n", "from Scheduler.scheduler import PyTorchSimRunner\n", "device = PyTorchSimRunner.setup_device().custom_device()\n", "\n", @@ -71,8 +70,8 @@ "metadata": {}, "outputs": [], "source": [ - "os.environ['TORCHSIM_DUMP_PATH']=os.path.join(base_dir, \"non_fused\")\n", - "os.environ['TORCHSIM_COMPILER_OPTIMIZATION']=\"none\"\n", + "os.environ['TORCHSIM_DUMP_PATH']=os.path.join(os.getcwd(), \"non_fused\")\n", + "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/tutorial/session1/togsim_configs/togsim_config_no_compiler_optimization.json\"\n", "\n", "input = torch.randn(1024, 1024).to(device=device)\n", "weight = torch.randn(1024, 1024).to(device=device)\n", diff --git a/tutorial/session1/ExecutionMode.ipynb b/tutorial/session1/ExecutionMode.ipynb index 12706edb..22e00bed 100644 --- a/tutorial/session1/ExecutionMode.ipynb +++ b/tutorial/session1/ExecutionMode.ipynb @@ -36,9 +36,6 @@ "from Scheduler.scheduler import PyTorchSimRunner\n", "device = PyTorchSimRunner.setup_device().custom_device()\n", "\n", - "os.environ['pytorchsim_functional_mode']=\"1\"\n", - "os.environ['pytorchsim_timing_mode']=\"1\"\n", - "\n", "input = torch.randn(1024, 1024).to(device=device)\n", "weight = torch.randn(1024, 1024).to(device=device)\n", "\n", @@ -59,8 +56,7 @@ "metadata": {}, "outputs": [], "source": [ - "os.environ['pytorchsim_functional_mode']=\"1\"\n", - "os.environ['pytorchsim_timing_mode']=\"0\"\n", + "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/tutorial/session1/togsim_configs/togsim_config_functional_only.json\"\n", "\n", "input = torch.randn(1024, 1024).to(device=device)\n", "weight = torch.randn(1024, 1024).to(device=device)\n", @@ -82,8 +78,7 @@ "metadata": {}, "outputs": [], "source": [ - "os.environ['pytorchsim_functional_mode']=\"0\"\n", - "os.environ['pytorchsim_timing_mode']=\"1\"\n", + "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/tutorial/session1/togsim_configs/togsim_config_timing_only.json\"\n", "\n", "input = torch.randn(1024, 1024).to(device=device)\n", "weight = torch.randn(1024, 1024).to(device=device)\n", @@ -106,7 +101,7 @@ "metadata": {}, "outputs": [], "source": [ - "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/configs/systolic_ws_128x128_c1_simple_noc_tpuv3.json\"\n", + "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/tutorial/session1/togsim_configs/togsim_config_timing_only.json\"\n", "\n", "input = torch.randn(2048, 2048).to(device=device)\n", "weight = torch.randn(2048, 2048).to(device=device)\n", @@ -137,7 +132,7 @@ "metadata": {}, "outputs": [], "source": [ - "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/configs/systolic_ws_128x128_c2_simple_noc_tpuv3.json\"\n", + "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/tutorial/session1/togsim_configs/togsim_config_2_cores.json\"\n", "\n", "input = torch.randn(2048, 2048).to(device=device)\n", "weight = torch.randn(2048, 2048).to(device=device)\n", diff --git a/tutorial/session1/LogAnalysis.ipynb b/tutorial/session1/LogAnalysis.ipynb index 38846216..4f1e17cb 100644 --- a/tutorial/session1/LogAnalysis.ipynb +++ b/tutorial/session1/LogAnalysis.ipynb @@ -18,8 +18,8 @@ "import sys\n", "base_dir = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim')\n", "sys.path.append(base_dir)\n", - "os.environ['pytorchsim_functional_mode']=\"0\"\n", - "os.environ['pytorchsim_timing_modededededede']=\"1\"" + "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/tutorial/session1/togsim_configs/togsim_config_timing_only.json\"\n", + "os.environ['TORCHSIM_DUMP_LOG_PATH']=os.path.join(os.getcwd(), \"togsim_results\")" ] }, { diff --git a/tutorial/session1/Mapping.ipynb b/tutorial/session1/Mapping.ipynb index b29825f7..b02c98fe 100644 --- a/tutorial/session1/Mapping.ipynb +++ b/tutorial/session1/Mapping.ipynb @@ -68,10 +68,7 @@ "source": [ "torch._dynamo.reset()\n", "\n", - "os.environ['TORCHSIM_MAPPING_POLICY']=\"manual\"\n", - "os.environ['TORCHSIM_TILE_M']=\"512\"\n", - "os.environ['TORCHSIM_TILE_N']=\"512\"\n", - "os.environ['TORCHSIM_TILE_K']=\"512\"\n", + "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/tutorial/session1/togsim_configs/togsim_config_external_mapping.json\"\n", "\n", "input = torch.randn(1024, 1024).to(device=device)\n", "weight = torch.randn(1024, 1024).to(device=device)\n", @@ -104,7 +101,7 @@ "source": [ "torch._dynamo.reset()\n", "\n", - "os.environ['TORCHSIM_MAPPING_POLICY']=\"heuristic\"\n", + "os.environ['TOGSIM_CONFIG']=f\"{base_dir}/tutorial/session1/togsim_configs/togsim_config_autotune.json\"\n", "\n", "input = torch.randn(1024, 1024).to(device=device)\n", "weight = torch.randn(1024, 1024).to(device=device)\n", diff --git a/tutorial/session1/togsim_configs/togsim_config.json b/tutorial/session1/togsim_configs/togsim_config.json new file mode 100644 index 00000000..e8e489d9 --- /dev/null +++ b/tutorial/session1/togsim_configs/togsim_config.json @@ -0,0 +1,32 @@ +{ + "num_cores" : 1, + "core_freq_mhz" : 940, + "core_stats_print_period_cycles" : 10000, + "num_systolic_array_per_core" : 2, + + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + + "dram_type" : "ramulator2", + "dram_freq_mhz" : 940, + "dram_channels": 16, + "dram_req_size_byte": 32, + "dram_num_burst_length" : 2, + "dram_stats_print_period_cycles": 10000, + "ramulator_config_path" : "../configs/ramulator2_configs/HBM2_TPUv3.yaml", + + "icnt_type" : "simple", + "icnt_latency_cycles" : 10, + "icnt_freq_mhz" : 940, + "icnt_injection_ports_per_core" : 16, + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "heuristic", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" +} \ No newline at end of file diff --git a/tutorial/session1/togsim_configs/togsim_config_2_cores.json b/tutorial/session1/togsim_configs/togsim_config_2_cores.json new file mode 100644 index 00000000..c50edaa9 --- /dev/null +++ b/tutorial/session1/togsim_configs/togsim_config_2_cores.json @@ -0,0 +1,32 @@ +{ + "num_cores" : 2, + "core_freq_mhz" : 940, + "core_stats_print_period_cycles" : 10000, + "num_systolic_array_per_core" : 2, + + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + + "dram_type" : "ramulator2", + "dram_freq_mhz" : 940, + "dram_channels": 32, + "dram_req_size_byte": 32, + "dram_num_burst_length" : 2, + "dram_stats_print_period_cycles": 10000, + "ramulator_config_path" : "../configs/ramulator2_configs/HBM2_TPUv3.yaml", + + "icnt_type" : "simple", + "icnt_latency_cycles" : 10, + "icnt_freq_mhz" : 940, + "icnt_injection_ports_per_core" : 16, + + "pytorchsim_functional_mode" : 0, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "heuristic", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" +} \ No newline at end of file diff --git a/tutorial/session1/togsim_configs/togsim_config_autotune.json b/tutorial/session1/togsim_configs/togsim_config_autotune.json new file mode 100644 index 00000000..c9763e92 --- /dev/null +++ b/tutorial/session1/togsim_configs/togsim_config_autotune.json @@ -0,0 +1,32 @@ +{ + "num_cores" : 1, + "core_freq_mhz" : 940, + "core_stats_print_period_cycles" : 10000, + "num_systolic_array_per_core" : 2, + + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + + "dram_type" : "ramulator2", + "dram_freq_mhz" : 940, + "dram_channels": 16, + "dram_req_size_byte": 32, + "dram_num_burst_length" : 2, + "dram_stats_print_period_cycles": 10000, + "ramulator_config_path" : "../configs/ramulator2_configs/HBM2_TPUv3.yaml", + + "icnt_type" : "simple", + "icnt_latency_cycles" : 10, + "icnt_freq_mhz" : 940, + "icnt_injection_ports_per_core" : 16, + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" +} \ No newline at end of file diff --git a/tutorial/session1/togsim_configs/togsim_config_external_mapping.json b/tutorial/session1/togsim_configs/togsim_config_external_mapping.json new file mode 100644 index 00000000..c8ddb0f3 --- /dev/null +++ b/tutorial/session1/togsim_configs/togsim_config_external_mapping.json @@ -0,0 +1,32 @@ +{ + "num_cores" : 1, + "core_freq_mhz" : 940, + "core_stats_print_period_cycles" : 10000, + "num_systolic_array_per_core" : 2, + + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + + "dram_type" : "ramulator2", + "dram_freq_mhz" : 940, + "dram_channels": 16, + "dram_req_size_byte": 32, + "dram_num_burst_length" : 2, + "dram_stats_print_period_cycles": 10000, + "ramulator_config_path" : "../configs/ramulator2_configs/HBM2_TPUv3.yaml", + + "icnt_type" : "simple", + "icnt_latency_cycles" : 10, + "icnt_freq_mhz" : 940, + "icnt_injection_ports_per_core" : 16, + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "external-then-heuristic", + "codegen_external_mapping_file" : "/workspace/PyTorchSim/tutorial/session1/tutorial_external_mapping.json", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" +} \ No newline at end of file diff --git a/tutorial/session1/togsim_configs/togsim_config_functional_only.json b/tutorial/session1/togsim_configs/togsim_config_functional_only.json new file mode 100644 index 00000000..53072307 --- /dev/null +++ b/tutorial/session1/togsim_configs/togsim_config_functional_only.json @@ -0,0 +1,32 @@ +{ + "num_cores" : 1, + "core_freq_mhz" : 940, + "core_stats_print_period_cycles" : 10000, + "num_systolic_array_per_core" : 2, + + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + + "dram_type" : "ramulator2", + "dram_freq_mhz" : 940, + "dram_channels": 16, + "dram_req_size_byte": 32, + "dram_num_burst_length" : 2, + "dram_stats_print_period_cycles": 10000, + "ramulator_config_path" : "../configs/ramulator2_configs/HBM2_TPUv3.yaml", + + "icnt_type" : "simple", + "icnt_latency_cycles" : 10, + "icnt_freq_mhz" : 940, + "icnt_injection_ports_per_core" : 16, + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 0, + + "codegen_mapping_strategy" : "heuristic", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" +} \ No newline at end of file diff --git a/tutorial/session1/togsim_configs/togsim_config_no_compiler_optimization.json b/tutorial/session1/togsim_configs/togsim_config_no_compiler_optimization.json new file mode 100644 index 00000000..e2b9c8c8 --- /dev/null +++ b/tutorial/session1/togsim_configs/togsim_config_no_compiler_optimization.json @@ -0,0 +1,32 @@ +{ + "num_cores" : 1, + "core_freq_mhz" : 940, + "core_stats_print_period_cycles" : 10000, + "num_systolic_array_per_core" : 2, + + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + + "dram_type" : "ramulator2", + "dram_freq_mhz" : 940, + "dram_channels": 16, + "dram_req_size_byte": 32, + "dram_num_burst_length" : 2, + "dram_stats_print_period_cycles": 10000, + "ramulator_config_path" : "../configs/ramulator2_configs/HBM2_TPUv3.yaml", + + "icnt_type" : "simple", + "icnt_latency_cycles" : 10, + "icnt_freq_mhz" : 940, + "icnt_injection_ports_per_core" : 16, + + "pytorchsim_functional_mode" : 0, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "heuristic", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "none" +} \ No newline at end of file diff --git a/tutorial/session1/togsim_configs/togsim_config_timing_only.json b/tutorial/session1/togsim_configs/togsim_config_timing_only.json new file mode 100644 index 00000000..0b846bbd --- /dev/null +++ b/tutorial/session1/togsim_configs/togsim_config_timing_only.json @@ -0,0 +1,32 @@ +{ + "num_cores" : 1, + "core_freq_mhz" : 940, + "core_stats_print_period_cycles" : 10000, + "num_systolic_array_per_core" : 2, + + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + + "dram_type" : "ramulator2", + "dram_freq_mhz" : 940, + "dram_channels": 16, + "dram_req_size_byte": 32, + "dram_num_burst_length" : 2, + "dram_stats_print_period_cycles": 10000, + "ramulator_config_path" : "../configs/ramulator2_configs/HBM2_TPUv3.yaml", + + "icnt_type" : "simple", + "icnt_latency_cycles" : 10, + "icnt_freq_mhz" : 940, + "icnt_injection_ports_per_core" : 16, + + "pytorchsim_functional_mode" : 0, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "heuristic", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" +} \ No newline at end of file From 7c31a63413df7269915b186ae62f69609c0274af Mon Sep 17 00:00:00 2001 From: Wonhyuk Yang Date: Wed, 3 Dec 2025 04:08:39 +0000 Subject: [PATCH 17/21] [Tutorial] Handon fix --- tutorial/session2/Hands_on.ipynb | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tutorial/session2/Hands_on.ipynb b/tutorial/session2/Hands_on.ipynb index ef66a5f6..33ec1a28 100644 --- a/tutorial/session2/Hands_on.ipynb +++ b/tutorial/session2/Hands_on.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "89aac974-97ea-46f2-b856-7b37c0a23add", "metadata": {}, "outputs": [ @@ -31,7 +31,8 @@ "import torch\n", "import torch._dynamo\n", "import torch.utils.cpp_extension\n", - "sys.path.append(os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim'))\n", + "base_dir = os.environ.get('TORCHSIM_DIR', default='/workspace/PyTorchSim')\n", + "sys.path.append(base_dir)\n", "\n", "from Scheduler.scheduler import PyTorchSimRunner\n", "module = PyTorchSimRunner.setup_device()\n", @@ -58,7 +59,7 @@ " x = torch.randn(size).to(device=device)\n", " opt_fn = torch.compile(dynamic=False)(exponent2)\n", " res = opt_fn(x)\n", - " out = exponent(x.cpu())\n", + " out = exponent2(x.cpu())\n", " test_result(\"exponent2\", res, out)" ] }, @@ -68,6 +69,13 @@ "id": "42d509f3-d955-4149-9f0f-bd0f3d0620f9", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2025-12-03 02:02:14,679] [0/0] torch._inductor.debug: [WARNING] model___9 debug trace: /tmp/torchinductor_root/uu/cuumxtbdv4ukzpymchmrda2exohouwcdybawmj2v7jog4vbvoycf.debug\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -76,7 +84,7 @@ "[Gem5] Gem5 is running... \n", "[Spike] Running Spike simulator\n", "[TOGSim] TOGSim is running.. \n", - "[TOGSim] Simulation of \"/tmp/torchinductor/tmp/wefbdnuiezd/tile_graph.onnx\" is stored to \"/tmp/torchinductor/tmp/wefbdnuiezd/togsim_result/0\"\n", + "[TOGSim] Simulation log is stored to \"/workspace/PyTorchSim/togsim_results/20251203_020218.log\"\n", "------------------\n", "|exp2 Test Passed|\n", "------------------\n" @@ -96,7 +104,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "5bfdf22f-e749-41a5-a2cf-dcbb630bfb83", "metadata": {}, "outputs": [ @@ -230,7 +238,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, From 7f7da299776ff2b437e5a830201ac3cac1f05271 Mon Sep 17 00:00:00 2001 From: Wonhyuk Yang Date: Thu, 4 Dec 2025 12:27:58 +0000 Subject: [PATCH 18/21] [CI] Add a CI for building tutorial images --- .github/workflows/docker-tutorial-image.yml | 35 +++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 .github/workflows/docker-tutorial-image.yml diff --git a/.github/workflows/docker-tutorial-image.yml b/.github/workflows/docker-tutorial-image.yml new file mode 100644 index 00000000..c7d3a2ca --- /dev/null +++ b/.github/workflows/docker-tutorial-image.yml @@ -0,0 +1,35 @@ +name: Docker image for tutorial + +on: + push: + branches: [ "tutorial" ] + +jobs: + build: + runs-on: self-hosted + + permissions: + contents: read + packages: write + + steps: + # Step 1: Checkout the repository + - name: Checkout Code + uses: actions/checkout@v4 + + # Step 2: Log in to GitHub Container Registry + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + # Step 3: Build and Push Docker Image + - name: Build and Push Docker Image + uses: docker/build-push-action@v4 + with: + context: . + file: ./Dockerfile.ksc2025 + push: true + tags: ghcr.io/psal-postech/torchsim_ksc2025:latest From 55097363fe579ac1203629e205c6b7a90de6787d Mon Sep 17 00:00:00 2001 From: Wonhyuk Yang Date: Fri, 5 Dec 2025 06:36:44 +0000 Subject: [PATCH 19/21] [Refactor] Remove deprecated python code --- .gitignore | 6 ++++ test_extension_backend.py | 58 --------------------------------------- 2 files changed, 6 insertions(+), 58 deletions(-) delete mode 100644 test_extension_backend.py diff --git a/.gitignore b/.gitignore index 9decced5..b42d5f6b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,9 @@ __pycache__/ TOGSim/build/ .vscode +*.txt +*.ipynb_checkpoints +output +togsim_results/* +outputs/* +experiments/artifact/logs/* \ No newline at end of file diff --git a/test_extension_backend.py b/test_extension_backend.py deleted file mode 100644 index 5e6427ef..00000000 --- a/test_extension_backend.py +++ /dev/null @@ -1,58 +0,0 @@ -import torch._dynamo -import torch.utils.cpp_extension -from tests.test_add import test_vectoradd, test_vector_scalar_add -from tests.test_reduce import test_reduce_sum -from tests.test_transpose2D import test_Transpose2D, test_Transpose2D_2 -from tests.test_transpose3D import test_Transpose3D_1, test_Transpose3D_2, test_Transpose3D_3 -from tests.test_view3D_2D import test_view3D_2D -from tests.test_softmax import test_softmax -from tests.test_batchnorm import test_BatchNorm -from tests.test_layernorm import test_LayerNorm -from tests.test_conv2d import test_conv2d -from tests.test_matmul import test_matmul -from tests.test_bmm import test_BMM -from tests.test_cnn import test_CNN -from tests.test_transformer import test_EncoderBlock -from tests.test_resnet import test_resnet -from tests.test_mlp import test_mlp, test_mlp_inf -from tests.MoE.test_moe import test_moe -from tests.test_pool import test_avgpool, test_maxpool -from tests.Fusion.test_addmm_residual import test_addmm_residual -from tests.Fusion.test_matmul_scalar import test_matmul_scalar -from tests.Fusion.test_matmul_activation import test_matmul_activation - -if __name__ == "__main__": - from Scheduler.scheduler import PyTorchSimRunner - module = PyTorchSimRunner.setup_device() - device = module.custom_device() - #test_vectoradd(device, (47, 10)) - #test_vector_scalar_add(device, (10, 10)) - #test_reduce_sum(device, (32, 32), 1, keepdim=True) - #test_reduce_sum(device, (32, 32), 0, keepdim=True) - #test_reduce_sum(device, (512, 512), 1, keepdim=True) - #test_reduce_sum(device, (512, 512), 0, keepdim=True) - #test_Transpose2D(device, [64, 156]) - #test_Transpose2D_2(device, [16, 64]) - #test_Transpose3D_1(device, [62, 34, 256]) - #test_Transpose3D_2(device, [62, 34, 256]) - #test_Transpose3D_3(device, [62, 34, 256]) - #test_view3D_2D(device) - test_maxpool(device) - #test_avgpool(device) - #test_softmax(device, (256, 256), dim=1) - #test_BatchNorm(device) - #test_LayerNorm(device, (64, 128)) - #test_conv2d(device) - #test_matmul(device, 33, 45, 68) - #test_BMM(device) - #test_CNN(device) - #test_EncoderBlock(device) - #test_resnet(device) - #test_mlp(device) - #test_mlp_inf(device, batch_size=64, input_size=256, hidden_size=512, output_size=256, sparsity=0.97) - - # # Fusion Test - #test_matmul_scalar(device) - #test_matmul_activation(device, batch_size=32, input_size=32, output_size=32, activation_fn="relu") - #test_matmul_activation(device, batch_size=32, input_size=32, output_size=32, activation_fn="sigmoid") - #test_addmm_residual(device) From 56c84d1812cd516c4654cf040f32f46205aee4f1 Mon Sep 17 00:00:00 2001 From: Wonhyuk Yang Date: Fri, 5 Dec 2025 06:48:26 +0000 Subject: [PATCH 20/21] [Fix] Correct an incorrectly changed string literal --- PyTorchSimFrontend/mlir/mlir_codegen_backend.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/PyTorchSimFrontend/mlir/mlir_codegen_backend.py b/PyTorchSimFrontend/mlir/mlir_codegen_backend.py index 3f055791..6650f429 100644 --- a/PyTorchSimFrontend/mlir/mlir_codegen_backend.py +++ b/PyTorchSimFrontend/mlir/mlir_codegen_backend.py @@ -1725,7 +1725,7 @@ def get_dma_info(self, name, index, broadcast=True, store_reduction=False, buffe """ # Use loads as default if buffer is None: - buffer = self.applys if "outputs" not in str(index) else self.dma_loads + buffer = self.applys if "tmp" not in str(index) else self.dma_loads # TODO. kg_tile_desc = self.kernel_group.tile_desc @@ -1736,7 +1736,7 @@ def get_dma_info(self, name, index, broadcast=True, store_reduction=False, buffe total_dims = [int(str(i)[5:]) for i in self.itervars] local_tile_desc = mlir_common.MLIRMultiDimTile([1], self.vector_lane) local_dims.sort() # Assume that smaller index is placed in the outer loop - indirect_dims = [f"{i}" for i in index.free_symbols if "outputs" in str(i)] + indirect_dims = [f"{i}" for i in index.free_symbols if "tmp" in str(i)] for indirect_dim in indirect_dims: index = index.replace(sympy.Symbol(indirect_dim), 0) @@ -1992,7 +1992,7 @@ def get_mask(self): return mask_shape, mask_var def convert_indirect_indexing(self, index :sympy.Expr): - if "outputs" not in str(index): + if "tmp" not in str(index): return index, None # Note: In case of indirect indexing, dimensions should be divisible by tile size @@ -2003,7 +2003,7 @@ def convert_indirect_indexing(self, index :sympy.Expr): raise mlir_common.RecompileSignal(f"Indirect access (tile size {self.kernel_group.tile_desc.get_tile_size()} is not divisible by {self.ranges})") # Process start - indirect_dims = [str(dim) for dim in index.free_symbols if "outputs" in str(dim)] + indirect_dims = [str(dim) for dim in index.free_symbols if "tmp" in str(dim)] indirect_dims.sort() first_dim = indirect_dims[0] spad_vars = dict() @@ -2051,7 +2051,7 @@ def convert_indirect_indexing(self, index :sympy.Expr): # Apply stride for arg in index.args: - if "outputs" not in str(arg): + if "tmp" not in str(arg): continue if arg.is_Mul and arg.args[0].is_number: coeff_dtype = self.var_info[spad_vars[str(arg.args[1])]][1] From b2255418f40efa50047d2a71a7961aa9e4410812 Mon Sep 17 00:00:00 2001 From: Wonhyuk Yang Date: Fri, 5 Dec 2025 07:07:24 +0000 Subject: [PATCH 21/21] [Config] remove unused config file and update --- configs/heterogeneous_c2_simple_noc.json | 13 +++++++++++- .../systolic_ws_8x8_c1_12G_simple_noc.json | 17 ---------------- .../systolic_ws_8x8_c1_24G_simple_noc.json | 17 ---------------- .../systolic_ws_8x8_c1_48G_simple_noc.json | 17 ---------------- configs/systolic_ws_8x8_c1_booksim.json | 20 +++++++++++++++---- configs/systolic_ws_8x8_c1_simple_noc.json | 17 ++++++++++++++-- .../systolic_ws_8x8_c2_12G_simple_noc.json | 18 ----------------- .../systolic_ws_8x8_c2_24G_simple_noc.json | 17 ---------------- .../systolic_ws_8x8_c2_48G_simple_noc.json | 17 ---------------- 9 files changed, 43 insertions(+), 110 deletions(-) delete mode 100644 configs/systolic_ws_8x8_c1_12G_simple_noc.json delete mode 100644 configs/systolic_ws_8x8_c1_24G_simple_noc.json delete mode 100644 configs/systolic_ws_8x8_c1_48G_simple_noc.json delete mode 100644 configs/systolic_ws_8x8_c2_12G_simple_noc.json delete mode 100644 configs/systolic_ws_8x8_c2_24G_simple_noc.json delete mode 100644 configs/systolic_ws_8x8_c2_48G_simple_noc.json diff --git a/configs/heterogeneous_c2_simple_noc.json b/configs/heterogeneous_c2_simple_noc.json index 293fe385..a68f38c2 100644 --- a/configs/heterogeneous_c2_simple_noc.json +++ b/configs/heterogeneous_c2_simple_noc.json @@ -4,10 +4,15 @@ "num_cores" : 2, "core_freq_mhz" : 940, "core_stats_print_period_cycles" : 10000, + "num_stonne_per_core" : 8, "num_stonne_port" : 64, "num_systolic_array_per_core" : 2, + "vpu_num_lanes" : 128, + "vpu_spad_size_kb_per_lane" : 128, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq_mhz" : 940, "dram_channels": 16, @@ -25,5 +30,11 @@ "partition": { "core_0":0, "core_1":1 - } + }, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } \ No newline at end of file diff --git a/configs/systolic_ws_8x8_c1_12G_simple_noc.json b/configs/systolic_ws_8x8_c1_12G_simple_noc.json deleted file mode 100644 index 65236e3f..00000000 --- a/configs/systolic_ws_8x8_c1_12G_simple_noc.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "num_cores" : 1, - "core_freq_mhz" : 1000, - "core_stats_print_period_cycles" : 100000, - - "dram_type" : "ramulator2", - "dram_freq_mhz" :800, - "dram_channels": 1, - "dram_req_size_byte": 64, - "dram_num_burst_length" : 4, - "dram_stats_print_period_cycles": 100000, - "ramulator_config_path" : "../configs/ramulator2_configs/DDR4.yaml", - - "icnt_type" : "simple", - "icnt_latency_cycles" : 10, - "icnt_freq_mhz" : 1000 -} \ No newline at end of file diff --git a/configs/systolic_ws_8x8_c1_24G_simple_noc.json b/configs/systolic_ws_8x8_c1_24G_simple_noc.json deleted file mode 100644 index a1fe4d12..00000000 --- a/configs/systolic_ws_8x8_c1_24G_simple_noc.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "num_cores" : 1, - "core_freq_mhz" : 1000, - "core_stats_print_period_cycles" : 100000, - - "dram_type" : "ramulator2", - "dram_freq_mhz" :800, - "dram_channels": 2, - "dram_req_size_byte": 64, - "dram_num_burst_length" : 4, - "dram_stats_print_period_cycles": 100000, - "ramulator_config_path" : "../configs/ramulator2_configs/DDR4.yaml", - - "icnt_type" : "simple", - "icnt_latency_cycles" : 10, - "icnt_freq_mhz" : 1000 -} \ No newline at end of file diff --git a/configs/systolic_ws_8x8_c1_48G_simple_noc.json b/configs/systolic_ws_8x8_c1_48G_simple_noc.json deleted file mode 100644 index c1431f6d..00000000 --- a/configs/systolic_ws_8x8_c1_48G_simple_noc.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "num_cores" : 1, - "core_freq_mhz" : 1000, - "core_stats_print_period_cycles" : 100000, - - "dram_type" : "ramulator2", - "dram_freq_mhz" :800, - "dram_channels": 4, - "dram_req_size_byte": 64, - "dram_num_burst_length" : 4, - "dram_stats_print_period_cycles": 100000, - "ramulator_config_path" : "../configs/ramulator2_configs/DDR4.yaml", - - "icnt_type" : "simple", - "icnt_latency_cycles" : 10, - "icnt_freq_mhz" : 1000 -} \ No newline at end of file diff --git a/configs/systolic_ws_8x8_c1_booksim.json b/configs/systolic_ws_8x8_c1_booksim.json index 0f42812d..851664e6 100644 --- a/configs/systolic_ws_8x8_c1_booksim.json +++ b/configs/systolic_ws_8x8_c1_booksim.json @@ -1,17 +1,29 @@ { "num_cores" : 1, - "core_freq_mhz" : 1000, + "core_freq_mhz" : 800, "core_stats_print_period_cycles" : 100000, + "vpu_num_lanes" : 8, + "vpu_spad_size_kb_per_lane" : 32, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq_mhz" :800, "dram_channels": 1, "dram_req_size_byte": 64, "dram_num_burst_length" : 4, - "dram_stats_print_period_cycless": 100000, + "dram_stats_print_period_cycles": 100000, "ramulator_config_path" : "../configs/ramulator2_configs/DDR4.yaml", "icnt_type" : "booksim2", - "icnt_latency_cycles" : 10, - "icnt_freq_mhz" : 1000 + "icnt_freq_mhz" : 800, + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } \ No newline at end of file diff --git a/configs/systolic_ws_8x8_c1_simple_noc.json b/configs/systolic_ws_8x8_c1_simple_noc.json index 5bb742bd..2eb7e183 100644 --- a/configs/systolic_ws_8x8_c1_simple_noc.json +++ b/configs/systolic_ws_8x8_c1_simple_noc.json @@ -1,8 +1,12 @@ { "num_cores" : 1, - "core_freq_mhz" : 1000, + "core_freq_mhz" : 800, "core_stats_print_period_cycles" : 100000, + "vpu_num_lanes" : 8, + "vpu_spad_size_kb_per_lane" : 32, + "vpu_vector_length_bits" : 256, + "dram_type" : "ramulator2", "dram_freq_mhz" :800, "dram_channels": 1, @@ -13,5 +17,14 @@ "icnt_type" : "simple", "icnt_latency_cycles" : 10, - "icnt_freq_mhz" : 1000 + "icnt_freq_mhz" : 800, + + "pytorchsim_functional_mode" : 1, + "pytorchsim_timing_mode" : 1, + + "codegen_mapping_strategy" : "autotune", + "codegen_external_mapping_file" : "", + "codegen_autotune_max_retry": 10, + "codegen_autotune_template_topk": 4, + "codegen_compiler_optimization" : "all" } \ No newline at end of file diff --git a/configs/systolic_ws_8x8_c2_12G_simple_noc.json b/configs/systolic_ws_8x8_c2_12G_simple_noc.json deleted file mode 100644 index 33fb2e7f..00000000 --- a/configs/systolic_ws_8x8_c2_12G_simple_noc.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "core_type" : ["ws_mesh","ws_mesh"], - "num_cores" : 2, - "core_freq_mhz" : 1000, - "core_stats_print_period_cycles" : 100000, - - "dram_type" : "ramulator2", - "dram_freq_mhz" :800, - "dram_channels": 1, - "dram_req_size_byte": 64, - "dram_num_burst_length" : 4, - "dram_stats_print_period_cycless": 100000, - "ramulator_config_path" : "../configs/ramulator2_configs/DDR4.yaml", - - "icnt_type" : "simple", - "icnt_latency_cycles" : 10, - "icnt_freq_mhz" : 1000 -} \ No newline at end of file diff --git a/configs/systolic_ws_8x8_c2_24G_simple_noc.json b/configs/systolic_ws_8x8_c2_24G_simple_noc.json deleted file mode 100644 index 9c4cbb5c..00000000 --- a/configs/systolic_ws_8x8_c2_24G_simple_noc.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "num_cores" : 2, - "core_freq_mhz" : 1000, - "core_stats_print_period_cycles" : 100000, - - "dram_type" : "ramulator2", - "dram_freq_mhz" :800, - "dram_channels": 2, - "dram_req_size_byte": 64, - "dram_num_burst_length" : 4, - "dram_stats_print_period_cycles": 100000, - "ramulator_config_path" : "../configs/ramulator2_configs/DDR4.yaml", - - "icnt_type" : "simple", - "icnt_latency_cycle" : 10, - "icnt_freq_mhz" : 1000 -} \ No newline at end of file diff --git a/configs/systolic_ws_8x8_c2_48G_simple_noc.json b/configs/systolic_ws_8x8_c2_48G_simple_noc.json deleted file mode 100644 index 143703aa..00000000 --- a/configs/systolic_ws_8x8_c2_48G_simple_noc.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "num_cores" : 2, - "core_freq_mhz" : 1000, - "core_stats_print_period_cycles" : 100000, - - "dram_type" : "ramulator2", - "dram_freq_mhz" :800, - "dram_channels": 4, - "dram_req_size_byte": 64, - "dram_num_burst_length" : 4, - "dram_stats_print_period_cycless": 100000, - "ramulator_config_path" : "../configs/ramulator2_configs/DDR4.yaml", - - "icnt_type" : "simple", - "icnt_latency_cycles" : 10, - "icnt_freq_mhz" : 1000 -} \ No newline at end of file