import sys, os
import random

COPYRIGHT = \
"""/*
 * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */"""


# This class is used to simplify the IR rule constraints
class IRRange:
    def __init__(self, lo, hi):
        self.lo = lo
        self.hi = hi
    def lower_bound(lo):
        return IRRange(lo, None)
    def upper_bound(hi):
        return IRRange(None, hi)
    def __str__(self):
        if self.lo is None and self.hi is None:
            return "int"
        lo = "[" if self.lo is None else f"[{self.lo}"
        hi = "]" if self.hi is None else f"{self.hi}]"
        return f"{lo}..{hi}"
    def __repr__(self):
        return str(self)
    def intersect(self, other):
        lo = None
        hi = None
        if self.lo is None:
            lo = other.lo
        elif other.lo is not None:
            lo = max(self.lo, other.lo)
        else:
            lo = self.lo
        if self.hi is None:
            hi = other.hi
        elif other.hi is not None:
            hi = min(self.hi, other.hi)
        else:
            hi = self.hi
        return IRRange(lo, hi)
    def is_empty(self):
        if self.lo is not None and self.hi is not None and self.lo > self.hi:
            return True
        else:
            return False
    def is_int(self):
        return (self.lo is None and self.hi is None)
    def get_constraints(self):
        l = []
        assert not self.is_empty() and not self.is_int()
        if (self.lo is not None) and (self.hi is not None) and (self.lo == self.hi):
            return [f"{self.lo}"]
        if self.lo is not None:
            l.append(f">= {self.lo}")
        if self.hi is not None:
            l.append(f"<= {self.hi}")
        return l

class IRBool:
    def __init__(self, t, f):
        self.t = t
        self.f = f
    def makeTrue():
        return IRBool(True, False)
    def makeFalse():
        return IRBool(False, True)
    def __str__(self):
        l = []
        if self.t:
            l.append("true")
        if self.f:
            l.append("false")
        l = ", ".join(l)
        return f"[{l}]"
    def __repr__(self):
        return str(self)
    def intersect(self, other):
        return IRBool(self.t and other.t, self.f and other.f)
    def is_empty(self):
        return (not self.t and not self.f)
    def is_int(self):
        return (self.t and self.f)
    def get_constraints(self):
        l = []
        assert not self.is_empty() and not self.is_int()
        if self.t:
            l.append(f"true")
        if self.f:
            l.append(f"false")
        return l

#r1 = IRRange.lower_bound(4)
#print(r1)
#r2 = IRRange.upper_bound(100)
#print(r2)
#print(r1.intersect(r2))
#print(r2.intersect(r1))
#r3 = IRRange.lower_bound(50)
#print(r3)
#print(r1.intersect(r3))
#print(r3.intersect(r1))
#
#print([r1, r2, r3])
#
#r4 = IRRange.lower_bound(70)
#r5 = IRRange.upper_bound(30)

#b1 = IRBool(False, False)
#b2 = IRBool(True, False)
#b3 = IRBool(False, True)
#b4 = IRBool(True, True)
#print(b1, b2, b3, b4)
#print("i", b1.intersect(b2))
#print("i", b4.intersect(b2))
#print("i", b3.intersect(b4))
#print("i", b4.intersect(b3))

class Platform:
    def __init__(self, name, cpu_features, vector_width):
        self.name = name
        self.cpu_features = cpu_features
        self.vector_width = vector_width

class PlatformIRRule:
    def __init__(self, platform):
        self.platform = platform
        self.nodes = []
        self.pre_constraints = []
        self.constraints = []
        self.positive_forbids_nodes = False
    def expect_no_nodes(self):
        self.positive_forbids_nodes = True
    def add_node(self, node):
        self.nodes.append(node)
    def add_constraint(self, variable, ir_range):
        self.constraints.append((variable, ir_range))
    def add_pre_constraint(self, variable, ir_range):
        self.pre_constraints.append((variable, ir_range))
    def simplify(self, constraints):
        # have one IRRange per variable
        varRange = dict()
        for v, r in constraints:
            if v in varRange:
                r = r.intersect(varRange[v])
            varRange[v] = r
        return list(varRange.items())
    def positive_constraints(self):
        return self.collect_constraints(self.pre_constraints + self.constraints)
    def collect_constraints(self, constraints):
        s = self.simplify(constraints)
        l = []
        for v, r in s:
            if r.is_empty():
                return None # impossible rule
            if not r.is_int():
                cs = r.get_constraints()
                for cc in cs:
                    l.append((v, cc)) # non-trivial constraint
        return l
    def generate(self, lines):
        # positive / And
        positive_c = self.positive_constraints()
        if positive_c is not None:
            if self.positive_forbids_nodes:
                ns = ", ".join(self.nodes)
                lines.append(f"    @IR(failOn = {{{ns}}},")
            else:
                ns = ", ".join([f"{n}, \"> 0\"" for n in self.nodes])
                lines.append(f"    @IR(counts = {{{ns}}},")
            if len(positive_c) > 0:
                isAnd = "And" if len(positive_c) > 1 else ""
                conditions = [f"\"{a1}\", \"{a2}\"" for (a1,a2) in positive_c]
                conditions = ", ".join(conditions)
                lines.append(f"        applyIf{isAnd} = {{{conditions}}},")
            cpu = [f"\"{c}\"" for c in self.platform.cpu_features]
            if len(cpu) > 0:
                isAnd = "And" if len(cpu) > 2 else ""
                cpu = ", ".join(cpu)
                lines.append(f"        applyIfCPUFeature{isAnd} = {{{cpu}}})")
        else:
            lines.append(f"    //   No positive IR rule: conditions impossible.")

class Type:
    def __init__(self, name, size, factor, operator, ir_op):
        self.name = name
        self.size = size
        self.factor = factor
        self.operator = operator
        self.ir_op = ir_op
        self.letter = name[0].upper()
    def platforms(self):
        p = []
        if self.name in ["byte", "char", "short"]:
           p.append(Platform("sse4.1 to avx", ["sse4.1", "true", "avx2", "false"], 16))
           p.append(Platform("avx2 to avx512 without avx512bw", ["avx2", "true", "avx512bw", "false"], 32))
           p.append(Platform("avx512bw", ["avx512bw", "true"], 64))
        elif self.name in ["float", "double"]:
           p.append(Platform("sse4.1", ["sse4.1", "true", "avx", "false"], 16))
           p.append(Platform("avx and avx2", ["avx", "true", "avx512", "false"], 32))
           p.append(Platform("avx512", ["avx512", "true"], 64))
        elif self.name in ["int", "long"]:
           p.append(Platform("sse4.1 to avx", ["sse4.1", "true", "avx2", "false"], 16))
           p.append(Platform("avx2", ["avx2", "true", "avx512", "false"], 32))
           p.append(Platform("avx512", ["avx512", "true"], 64))
        else:
           assert False, "type not implemented" + self.name
        p.append(Platform("asimd", ["asimd", "true", "sve", "false"], 16))
        p.append(Platform("sve", ["sve", "true"], 256))
        return p

class Test:
    def __init__(self, t, offset):
        self.name = t.name.capitalize() + \
                    ("M" if (offset < 0) else "P") + \
                    str(abs(offset))
        self.t = t
        self.offset = offset
    def gold(self):
        return f"gold{self.name}"
    def run(self):
        return f"run{self.name}"
    def test(self):
        return f"test{self.name}"

class TestScenario:
    def __init__(self, name, flags, requires):
        self.name = name
        self.flags = flags
        self.requires = requires
    def copy(self):
        return TestScenario(str(self.name), list(self.flags), list(self.requires))
    def format_flags(self):
        fs = [f"\"{f}\"" for f in self.flags]
        return ", ".join(fs)

class Generator:
    def __init__(self):
        self.types = [
            Type("int",    4, -11,      "*", "MUL_VI"),
            Type("long",   8, -11,      "+", "ADD_VL"), # arm NEON does not support MulVL
            Type("short",  2, -11,      "*", "MUL_VS"),
            Type("char",   2, -11,      "*", "MUL_VS"), # char behaves like short
            Type("byte",   1, 11,       "*", "MUL_VB"),
            Type("float",  4, "1.001f", "*", "MUL_VF"),
            Type("double", 8, "1.001",  "*", "MUL_VD"),
        ]
        self.offsets = [
            0,
            -1, 1,
            -2, 2,
            -3, 3,
            -4, 4,
            -7, 7,
            -8, 8,
            -14, 14,
            -16, 16,
            -18, 18,
            -20, 20,
            -31, 31,
            -32, 32,
            -63, 63,
            -64, 64,
            -65, 65,
            -128, 128,
            -129, 129,
            -192, 192, # 3 * 64
        ]
        self.range = 512

        req_x86 =     "(os.arch==\"x86\" | os.arch==\"i386\" | os.arch==\"amd64\" | os.arch==\"x86_64\")"
        req_not_x86 = "(os.arch!=\"x86\" & os.arch!=\"i386\" & os.arch!=\"amd64\" & os.arch!=\"x86_64\")"

        req_sse4     = "vm.cpu.features ~= \".*sse4.*\""
        req_avx1     = "vm.cpu.features ~= \".*avx.*\""
        req_avx2     = "vm.cpu.features ~= \".*avx2.*\""
        req_avx512   = "vm.cpu.features ~= \".*avx512.*\""
        req_avx512bw = "vm.cpu.features ~= \".*avx512bw.*\""

        align_0 = "-XX:-AlignVector"
        align_1 = "-XX:+AlignVector"

        v_002 = "-XX:MaxVectorSize=2"
        v_004 = "-XX:MaxVectorSize=4"
        v_008 = "-XX:MaxVectorSize=8"
        v_016 = "-XX:MaxVectorSize=16"
        v_032 = "-XX:MaxVectorSize=32"
        v_064 = "-XX:MaxVectorSize=64"

        sse4 = "-XX:UseSSE=4"
        avx1 = "-XX:UseAVX=1"
        avx2 = "-XX:UseAVX=2"
        avx3 = "-XX:UseAVX=3"
        knl = "-XX:+UseKNLSetting"

        scenarios = [
            TestScenario("vanilla",   [], []),
            TestScenario("sse4-v016",   [sse4, v_016], [req_x86, req_sse4]),
            TestScenario("sse4-v008",   [sse4, v_008], [req_x86, req_sse4]),
            TestScenario("sse4-v004",   [sse4, v_004], [req_x86, req_sse4]),
            TestScenario("sse4-v002",   [sse4, v_004], [req_x86, req_sse4]),
            TestScenario("avx1-v032",   [avx1, v_032], [req_x86, req_avx1]),
            TestScenario("avx1-v016",   [avx1, v_016], [req_x86, req_avx1]),
            TestScenario("avx2-v032",   [avx2, v_032], [req_x86, req_avx2]),
            TestScenario("avx2-v016",   [avx2, v_016], [req_x86, req_avx2]),
            TestScenario("avx512-v064", [avx3, knl, v_064], [req_x86, req_avx512]),
            TestScenario("avx512-v032", [avx3, knl, v_032], [req_x86, req_avx512]),
            TestScenario("avx512bw-v064", [avx3, v_064], [req_x86, req_avx512bw]),
            TestScenario("avx512bw-v032", [avx3, v_032], [req_x86, req_avx512bw]),
            TestScenario("vec-v064", [v_064], [req_not_x86]),
            TestScenario("vec-v032", [v_032], [req_not_x86]),
            TestScenario("vec-v016", [v_016], [req_not_x86]),
            TestScenario("vec-v008", [v_008], [req_not_x86]),
            TestScenario("vec-v004", [v_004], [req_not_x86]),
        ]

        self.scenarios = []
        for s in scenarios:
            s1 = s
            s2 = s.copy()
            s1.name += "-A"
            s2.name += "-U"
            s1.flags.append("-XX:+AlignVector")
            s2.flags.append("-XX:-AlignVector")
            self.scenarios.append(s1)
            self.scenarios.append(s2)

    def get_test_list(self):
        l = []
        for t in self.types:
            for o in self.offsets:
                l.append(Test(t, o))
        return l

    def generate(self, class_name, path_name, bugid, package_name):
        lines = []
        lines.append(f"{COPYRIGHT}")
        lines.append("")
        lines.append("/*")
        lines.append(" * Summary:")
        lines.append(" *   Test SuperWord vectorization with different access offsets")
        lines.append(" *   and various MaxVectorSize values, and +- AlignVector.")
        lines.append(" *")
        lines.append(" * Note: this test is auto-generated. Please modify / generate with script:")
        lines.append(" *       https://bugs.openjdk.org/browse/JDK-8334431")
        lines.append(" *")
        lines.append(" * Types: " + ", ".join([t.name for t in self.types]))
        lines.append(" * Offsets: " + ", ".join([str(o) for o in self.offsets]))
        lines.append(" *")
        lines.append(" * Checking if we should vectorize is a bit complicated. It depends on")
        lines.append(" * Matcher::vector_width_in_bytes, of the respective platforms (eg. x86.ad)")
        lines.append(" * This vector_width can be further constrained by MaxVectorSize.")
        lines.append(" *")
        lines.append(" * With '-XX:-AlignVector', we vectorize if:")
        lines.append(" *  - Vectors have at least 4 bytes:    vector_width >= 4")
        lines.append(" *  - Vectors hold at least 2 elements: vector_width >= 2 * sizeofop(velt_type)")
        lines.append(" *    -> min_vector_width = max(4, 2 * sizeofop(velt_type))")
        lines.append(" *    -> simplifies to: vector_width >= min_vector_width")
        lines.append(" *  - No cyclic dependency:")
        lines.append(" *    - Access: data[i + offset] = data[i] * fac;")
        lines.append(" *    - byte_offset = offset * sizeofop(type)")
        lines.append(" *    - Cyclic dependency if: 0 < byte_offset < vector_width")
        lines.append(" *")
        lines.append(" * Note: sizeofop(type) = sizeof(type), except sizeofop(char) = 2")
        lines.append(" *")
        lines.append(" * Different types can lead to different vector_width. This depends on")
        lines.append(" * the CPU-features.")
        lines.append(" *")
        lines.append(" * Definition:")
        lines.append(" *     MaxVectorSize: limit through flag")
        lines.append(" *     vector_width: limit given by specific CPU feature for a specific velt_type")
        lines.append(" *     actual_vector_width: what is actually vectorized with")
        lines.append(" *     min_vector_width: what is minimally required for vectorization")
        lines.append(" *")
        lines.append(" *     min_vector_width = max(4, 2 * sizeofop(velt_type))")
        lines.append(" *     MaxVectorSize >= vector_width >= actual_vector_width >= min_vector_width")
        lines.append(" *")
        lines.append(" * In general, we cannot easily specify negative IR rules, that require no")
        lines.append(" * vectorization to happen. We may improve the SuperWord algorithm later,")
        lines.append(" * or some additional optimization collapses some Loads, and suddenly cyclic")
        lines.append(" * dependency disappears, and we can vectorize.")
        lines.append(" *")
        lines.append(" * With '-XX:+AlignVector' we do the following:")
        lines.append(" *")
        lines.append(" * Must vectorize cleanly if:")
        lines.append(" *   1) guaranteed no misalignment AND")
        lines.append(" *   2) guaratneed no cyclic dependency")
        lines.append(" *")
        lines.append(" * Must not vectorize at all if:")
        lines.append(" *   1) guaranteed misalignment AND")
        lines.append(" *   2) guaranteed no cyclic dependency")
        lines.append(" *")
        lines.append(" * We could imagine a case with cyclic dependency, where C2 detects")
        lines.append(" * that only the first load is needed, and so no vectorization is")
        lines.append(" * required for it, and hence the store vector can be aligned.")
        lines.append(" *")
        lines.append(" * The alignment criteria is")
        lines.append(" *     byte_offset % aw == 0")
        lines.append(" * where align width (aw) is")
        lines.append(" *     aw = min(actual_vector_width, ObjectAlignmentInBytes)")
        lines.append(" * For simplicity, we assume that ObjectAlignmentInBytes == 8,")
        lines.append(" * which currently can only be changed manually and then no IR")
        lines.append(" * rule is run.")
        lines.append(" * This allows us to do the computation statically.")
        lines.append(" * Further, we define:")
        lines.append(" *     aw_min = min(min_vector_width, ObjectAlignmentInBytes)")
        lines.append(" *     aw_max = min(vector_width, ObjectAlignmentInBytes)")
        lines.append(" *     aw_min <= aw <= aw_max")
        lines.append(" *")
        lines.append(" * Again, we have no cyclic dependency, except when:")
        lines.append(" *     byte_offset > 0 and p.vector_width > byte_offset")
        lines.append(" * Here we must ensure that:")
        lines.append(" *     byte_offset >= MaxVectorSize")
        lines.append(" *")
        lines.append(" * Guaranteed no misalignment:")
        lines.append(" *     byte_offset % aw_max == 0")
        lines.append(" *       implies")
        lines.append(" *         byte_offset % aw == 0")
        lines.append(" *")
        lines.append(" * Guaranteed misalignment:")
        lines.append(" *     byte_offset % aw_min != 0")
        lines.append(" *       implies")
        lines.append(" *         byte_offset % aw != 0")
        lines.append(" *")
        lines.append(" */")
        lines.append("")
        self.generate_jtreg_tests(lines, class_name, bugid, package_name)
        lines.append(f"package {package_name};")
        lines.append("import compiler.lib.ir_framework.*;")
        lines.append("")
        lines.append(f"public class {class_name} {{")
        lines.append(f"    static final int RANGE = {self.range};")
        lines.append("")
        self.generate_gold_def(lines)
        self.generate_static_block(lines)
        self.generate_main(lines, class_name, package_name)
        self.generate_tests(lines)
        self.generate_inits(lines)
        self.generate_verifys(lines)
        lines.append("}")
        with open(f"{path_name}/{class_name}.java", "w") as f:
            for line in lines:
                f.write(f"{line}\n")

    def generate_jtreg_tests(self, lines, class_name, bugid, package_name):
        for scenario in self.scenarios:
            lines.append("/*")
            lines.append(f" * @test id={scenario.name}")
            lines.append(f" * @bug {bugid}")
            lines.append(" * @summary Test SuperWord: vector size, offsets, dependencies, alignment.")
            if len(scenario.requires) > 0:
                lines.append(" * @requires vm.compiler2.enabled")
            for req in scenario.requires:
                lines.append(f" * @requires {req}")
            lines.append(" * @library /test/lib /")
            #lines.append(f" * @run driver/timeout=400 {package_name}.{class_name} {scenario.name}")
            lines.append(f" * @run driver {package_name}.{class_name} {scenario.name}")
            lines.append(" */")
            lines.append("")

    def generate_gold_def(self, lines):
        for test in self.get_test_list():
            lines.append(f"    static {test.t.name}[] {test.gold()} = new {test.t.name}[RANGE];")
        lines.append("")

    def generate_main(self, lines, class_name, package_name):
        lines.append("    public static void main(String args[]) {")
        lines.append(f"        TestFramework framework = new TestFramework({class_name}.class);")
        lines.append("        framework.addFlags(\"-XX:-TieredCompilation\",")
        lines.append(f"                           \"-XX:CompileCommand=compileonly,{package_name}.{class_name}::init\",")
        lines.append(f"                           \"-XX:CompileCommand=compileonly,{package_name}.{class_name}::test*\",")
        lines.append(f"                           \"-XX:CompileCommand=compileonly,{package_name}.{class_name}::verify\",")
        lines.append("                           \"-XX:+IgnoreUnrecognizedVMOptions\", \"-XX:LoopUnrollLimit=250\");")
        lines.append("")
        lines.append("        if (args.length != 1) {")
        lines.append("            throw new RuntimeException(\"Test requires exactly one argument!\");")
        lines.append("        }")
        lines.append("")
        lines.append("        switch (args[0]) {")
        for scenario in self.scenarios:
            lines.append(f"        case \"{scenario.name}\":")
            lines.append(f"            framework.addFlags({scenario.format_flags()});")
            lines.append("            break;")
        lines.append("        default:")
        lines.append("            throw new RuntimeException(\"Test argument not recognized: \" + args[0]);")
        lines.append("        }")
        lines.append("        framework.start();")
        lines.append("    }")
        lines.append("")

    def generate_static_block(self, lines):
        lines.append("    static {")
        lines.append("        // compute the gold standard in interpreter mode")
        for test in self.get_test_list():
            lines.append(f"        init({test.gold()});")
            lines.append(f"        {test.test()}({test.gold()}, {test.gold()});")
        lines.append("    }")
        lines.append("")

    def generate_tests(self, lines):
        lines.append("    // ------------------- Tests -------------------")
        lines.append("")
        for test in self.get_test_list():
            lines.append("    @Test")
            # IR rules
            for p in test.t.platforms():
                elements = p.vector_width // test.t.size
                max_pre = "max " if p.name == "sve" else ""
                lines.append(f"    // CPU: {p.name} -> {max_pre}vector_width: {p.vector_width} -> {max_pre}elements in vector: {elements}")
                ###############  -AlignVector
                rule = PlatformIRRule(p)
                rule.add_pre_constraint("AlignVector", IRBool.makeFalse())
                rule.add_node(f"IRNode.LOAD_VECTOR_{test.t.letter}")
                rule.add_node(f"IRNode.{test.t.ir_op}")
                rule.add_node("IRNode.STORE_VECTOR")
                # at least 4 byte, and at least 2 elements in vector
                min_vector_width = max(4, 2 * test.t.size)
                rule.add_constraint("MaxVectorSize", IRRange.lower_bound( min_vector_width))
                byte_offset = test.offset * test.t.size
                # positive offset smaller than vector_width leads to cyclic dependency
                if byte_offset > 0 and p.vector_width > byte_offset:
                    # byte_offset < MaxVectorSize -> cyclic dependency
                    lines.append(f"    //   positive byte_offset {byte_offset} can lead to cyclic dependency")
                    rule.add_constraint("MaxVectorSize", IRRange.upper_bound( byte_offset ))
                rule.generate(lines)
                ###############  +AlignVector
                aw_min = min(min_vector_width, 8) # ObjectAlignmentInBytes == 8
                aw_max = min(p.vector_width, 8)   # ObjectAlignmentInBytes == 8
                if (byte_offset % aw_max) == 0:
                    # implies: (byte_offset % aw) == 0
                    # -> alignment guaranteed
                    lines.append(f"    //   Expect alignment.")
                    rule = PlatformIRRule(p)
                    rule.add_pre_constraint("AlignVector", IRBool.makeTrue())
                    # Sadly, ObjectAlignmentInBytes is 64bit only flag.
                    # rule.add_pre_constraint("ObjectAlignmentInBytes", IRRange(8, 8))
                    rule.add_node(f"IRNode.LOAD_VECTOR_{test.t.letter}")
                    rule.add_node(f"IRNode.{test.t.ir_op}")
                    rule.add_node("IRNode.STORE_VECTOR")
                    rule.add_constraint("MaxVectorSize", IRRange.lower_bound( min_vector_width))
                    # positive offset smaller than vector_width leads to cyclic dependency
                    if byte_offset > 0 and p.vector_width > byte_offset:
                        # byte_offset < MaxVectorSize -> cyclic dependency
                        rule.add_constraint("MaxVectorSize", IRRange.upper_bound( byte_offset ))
                    rule.generate(lines)
                elif (byte_offset % aw_min) != 0:
                    # implies: (byte_offset % aw) != 0
                    # -> misalignment guaranteed
                    lines.append(f"    //   Expect misalignment.")
                    rule = PlatformIRRule(p)
                    rule.add_pre_constraint("AlignVector", IRBool.makeTrue())
                    # Sadly, ObjectAlignmentInBytes is 64bit only flag.
                    #rule.add_pre_constraint("ObjectAlignmentInBytes", IRRange(8, 8))
                    rule.add_node(f"IRNode.LOAD_VECTOR_{test.t.letter}")
                    rule.add_node(f"IRNode.{test.t.ir_op}")
                    rule.add_node("IRNode.STORE_VECTOR")
                    # positive offset smaller than vector_width leads to cyclic dependency
                    if byte_offset > 0 and p.vector_width > byte_offset:
                        # byte_offset < MaxVectorSize -> cyclic dependency
                        rule.add_constraint("MaxVectorSize", IRRange.upper_bound( byte_offset ))
                    rule.expect_no_nodes()
                    rule.generate(lines)
                else:
                    lines.append(f"    //   Alignment unclear -> no IR rule for -XX:+AlignVector.")

            # test method
            lines.append(f"    public static void {test.test()}({test.t.name}[] data, {test.t.name}[] data_2) {{")
            start = 0 if (test.offset >= 0) else abs(test.offset)
            end_diff = "" if (test.offset <= 0) else f" - {abs(test.offset)}"
            lines.append(f"        for (int j = {start}; j < RANGE{end_diff}; j++) {{")
            lines.append(f"            data_2[j + {test.offset}] = ({test.t.name})(data[j] {test.t.operator} ({test.t.name}){test.t.factor});")
            lines.append(f"        }}")
            lines.append("    }")
            lines.append("")
            # run method
            lines.append(f"    @Run(test = \"{test.test()}\")")
            lines.append(f"    @Warmup(0)")
            lines.append(f"    public static void {test.run()}() {{")
            lines.append(f"        {test.t.name}[] data = new {test.t.name}[RANGE];")
            lines.append(f"        init(data);")
            lines.append(f"        {test.test()}(data, data);")
            lines.append(f"        verify(\"{test.test()}\", data, {test.gold()});")
            lines.append("    }")
            lines.append("")

    def generate_inits(self, lines):
        lines.append("    // ------------------- Initialization -------------------")
        lines.append("")
        for t in self.types:
            lines.append(f"    static void init({t.name}[] data) {{")
            lines.append(f"        for (int j = 0; j < RANGE; j++) {{")
            lines.append(f"            data[j] = ({t.name})j;")
            lines.append(f"        }}")
            lines.append(f"    }}")
            lines.append("")

    def generate_verifys(self, lines):
        lines.append("    // ------------------- Verification -------------------")
        lines.append("")
        for t in self.types:
            lines.append(f"    static void verify(String context, {t.name}[] data, {t.name}[] gold) {{")
            lines.append(f"        for (int i = 0; i < RANGE; i++) {{")
            lines.append(f"            if (data[i] != gold[i]) {{")
            lines.append(f"                throw new RuntimeException(\" Invalid \" + context + \" result: data[\" + i + \"]: \" + data[i] + \" != \" + gold[i]);")
            lines.append(f"            }}")
            lines.append(f"        }}")
            lines.append(f"    }}")
def main():
    g = Generator()
    g.generate("TestDependencyOffsets",
               "test/hotspot/jtreg/compiler/loopopts/superword",
               "8298935 8308606 8310308 8312570 8310190", # Big ID
               "compiler.loopopts.superword", # package
    )

if __name__ == "__main__":
    main()
