ICU-8972 redo writing of pnames_data.h: write properties & their values in uchar.h order, write strings rather than double indirection through name groups & string table; stable output with minor, proportional diffs in version upgrades, and much simpler code

X-SVN-Rev: 31164
2025-04-10 07:39:16 +00:00 · 2011-12-21 07:51:56 +00:00 · 2011-12-21 07:51:56 +00:00 · 05d42d4ed3
commit 05d42d4ed3
parent cec5a252fd
1 changed files with 145 additions and 253 deletions
--- a/tools/unicode/py/preparseucd.py
+++ b/tools/unicode/py/preparseucd.py
@ -110,8 +110,7 @@ _ignored_properties = set((
 # 0: Type of property (binary, enum, ...)
 # 1: List of aliases; short & long name followed by other aliases.
 #    The short name is "" if it is listed as "n/a" in PropertyValueAliases.txt.
-# 2: Dictionary, maps short property value names
-#    initially to None, later to ICU4C API enum constants.
+# 2: Set of short property value names.
 # 3: Dictionary of property values.
 #    For Catalog & Enumerated properties,
 #    maps each value name to a list of aliases.
@ -140,11 +139,17 @@ _defaults = {}
 # Initialized after parsing is done.
 _null_or_defaults = {}

-# Dictionary of short property names mapped to ICU4C UProperty enum constants.
-_property_name_to_enum = {}
+# List of properties with an ICU UProperty enum.
+# Each item is an (enum, pname, values) tuple.
+# - enum: the ICU enum UProperty constant string
+# - pname: the UCD short property name
+# - values: list of (enum, vname) pairs per property value
+#   - enum: the ICU property value's enum constant string
+#   - vname: the UCD short property value name
+_icu_properties = []

-# Dictionary of short gc value names mapped to UCharCategory enum constants.
-_gc_vname_to_enum = {}
+# Dictionary of short property names mapped to _icu_properties items.
+_pname_to_icu_prop = {}

 _non_alnum_re = re.compile("[^a-zA-Z0-9]")

@ -179,8 +184,7 @@ def GetShortPropertyName(pname):
  if pname in _null_values: return pname  # pname is already the short name.
  prop = GetProperty(pname)
  if not prop: return ""  # For ignored properties.
-  name = prop[1][0]
-  return name if name else prop[1][1]  # Long name if no short name.
+  return prop[1][0] or prop[1][1]  # Long name if no short name.


 def GetShortPropertyValueName(prop, vname):
@ -194,8 +198,7 @@ def GetShortPropertyValueName(prop, vname):
      raise NameError("unknown value name %s for property %s\n" %
                      (vname, prop[1][0]))
    values[vname] = aliases
-  short_name = aliases[0]
-  return short_name if short_name else aliases[1]  # Long name if no short name.
+  return aliases[0] or aliases[1]  # Long name if no short name.


 def NormalizePropertyValue(prop, vname):
@ -456,7 +459,7 @@ def ParsePropertyAliases(in_file):
          _null_values[name] = 0
        else:
          _null_values[name] = null_value
-        prop = (prop_type, aliases, {}, {})
+        prop = (prop_type, aliases, set(), {})
        for alias in aliases:
          _properties[alias] = prop
          _properties[NormPropName(alias)] = prop
@ -470,13 +473,13 @@ def ParsePropertyAliases(in_file):
  # Turkic_Case_Folding: The 'T' mappings in CaseFolding.txt.
  name = "Turkic_Case_Folding"
  _null_values[name] = ""
-  prop = ("String", [name, name], {}, {})
+  prop = ("String", [name, name], set(), {})
  _properties[name] = prop
  _properties[NormPropName(name)] = prop
  # Conditional_Case_Mappings: SpecialCasing.txt lines with conditions.
  name = "Conditional_Case_Mappings"
  _null_values[name] = ""
-  prop = ("Miscellaneous", [name, name], {}, {})
+  prop = ("Miscellaneous", [name, name], set(), {})
  _properties[name] = prop
  _properties[NormPropName(name)] = prop
  # lccc = ccc of first cp in canonical decomposition.
@ -497,7 +500,7 @@ def ParsePropertyAliases(in_file):
  # Script_Extensions
  if "scx" not in _properties:
    _null_values["scx"] = ""
-    prop = ("Miscellaneous", ["scx", "Script_Extensions"], {}, {})
+    prop = ("Miscellaneous", ["scx", "Script_Extensions"], set(), {})
    _properties["scx"] = prop
    _properties["Script_Extensions"] = prop
    _properties["scriptextensions"] = prop
@ -541,7 +544,7 @@ def ParsePropertyValueAliases(in_file):
        if short_name == "n/a":  # no short name
          fields[0] = ""
          short_name = fields[1]
-        prop[2][short_name] = None
+        prop[2].add(short_name)
        values = prop[3]
        for alias in fields:
          if alias:
@ -568,14 +571,14 @@ def ParsePropertyValueAliases(in_file):
  # Add ISO 15924-only script codes.
  # Only for the ICU script code API, not necessary for parsing the UCD.
  script_prop = _properties["sc"]
-  short_script_names = script_prop[2]  # dict
+  short_script_names = script_prop[2]  # set
  script_values = script_prop[3]  # dict
  remove_scripts = []
  for script in _scripts_only_in_iso15924:
    if script in short_script_names:
      remove_scripts.append(script)
    else:
-      short_script_names[script] = None
+      short_script_names.add(script)
      # Do not invent a Unicode long script name before the UCD adds the script.
      script_list = [script, script]  # [short, long]
      script_values[script] = script_list
@ -1470,30 +1473,6 @@ def PrintNameStats():

 # ICU API ------------------------------------------------------------------ ***

-# Sample line to match:
-#    USCRIPT_LOMA   = 139,/* Loma */
-_uscript_re = re.compile(
-    " *(USCRIPT_[A-Z_]+) *= *[0-9]+ *, */\* *([A-Z][a-z]{3}) *\*/")
-
-def ParseUScriptHeader(icu_src_root):
-  uscript_path = os.path.join(icu_src_root, "source",
-                              "common", "unicode", "uscript.h")
-  short_script_name_to_enum = _properties["sc"][2]
-  scripts_not_in_ucd = set()
-  with open(uscript_path, "r") as uscript_file:
-    for line in uscript_file:
-      match = _uscript_re.match(line)
-      if match:
-        (script_enum, script_code) = match.group(1, 2)
-        if script_code not in short_script_name_to_enum:
-          scripts_not_in_ucd.add(script_code)
-        else:
-          short_script_name_to_enum[script_code] = script_enum
-  if scripts_not_in_ucd:
-    raise ValueError("uscript.h has UScript constants for scripts "
-                     "not in the UCD nor in ISO 15924: %s" % scripts_not_in_ucd)
-
-
 # Sample line to match:
 #    UCHAR_UNIFIED_IDEOGRAPH=29,
 _uchar_re = re.compile(
@ -1501,7 +1480,7 @@ _uchar_re = re.compile(

 # Sample line to match:
 #    /** Zs @stable ICU 2.0 */
-_gc_comment_re = re.compile(" */\*\* *([A-Z][a-z]) *")
+_gc_comment_re = re.compile(" */\*\* *([A-Z][a-z]) ")

 # Sample line to match:
 #    U_SPACE_SEPARATOR         = 12,
@ -1509,7 +1488,7 @@ _gc_re = re.compile(" *(U_[A-Z_]+) *= *[0-9]+,")

 # Sample line to match:
 #    /** L @stable ICU 2.0 */
-_bc_comment_re = re.compile(" */\*\* *([A-Z]{1,3}) *")
+_bc_comment_re = re.compile(" */\*\* *([A-Z]{1,3}) ")

 # Sample line to match:
 #    U_LEFT_TO_RIGHT               = 0,
@ -1533,21 +1512,18 @@ def ParseUCharHeader(icu_src_root):
  uchar_path = os.path.join(icu_src_root, "source",
                            "common", "unicode", "uchar.h")
  with open(uchar_path, "r") as uchar_file:
-    mode = ""
-    prop = None
-    comment_value = "??"
+    mode = ""  # Mode string (=pname) during context-sensitive parsing.
+    comment_value = ""  # Property value from a comment preceding an enum.
+    # Note: The enum UProperty is first in uchar.h, before the enums for values.
    for line in uchar_file:
      # Parse some enums via context-sensitive "modes".
      # Necessary because the enum constant names do not contain
      # enough information.
      if "enum UCharCategory" in line:
        mode = "gc"
-        prop = _properties["gc"]
+        comment_value = ""
        continue
      if mode == "gc":
-        # Leave the normal short-to-enum map which is shared between gc & gcm
-        # with enums like U_GC_ZS_MASK.
-        # For writing gc enums to pnames_data.h use _gc_vname_to_enum.
        if line.startswith("}"):
          mode = ""
          continue
@ -1556,15 +1532,17 @@ def ParseUCharHeader(icu_src_root):
          comment_value = match.group(1)
          continue
        match = _gc_re.match(line)
-        if match:
+        if match and comment_value:
          gc_enum = match.group(1)
+          prop = _properties["gc"]
          vname = GetShortPropertyValueName(prop, comment_value)
-          _gc_vname_to_enum[vname] = gc_enum
+          icu_values = _pname_to_icu_prop["gc"][2]
+          icu_values.append((gc_enum, vname))
+        comment_value = ""
        continue
      if "enum UCharDirection {" in line:
        mode = "bc"
-        prop = _properties["bc"]
-        comment_value = "??"
+        comment_value = ""
        continue
      if mode == "bc":
        if line.startswith("}"):
@ -1575,10 +1553,13 @@ def ParseUCharHeader(icu_src_root):
          comment_value = match.group(1)
          continue
        match = _bc_re.match(line)
-        if match:
+        if match and comment_value:
          bc_enum = match.group(1)
+          prop = _properties["bc"]
          vname = GetShortPropertyValueName(prop, comment_value)
-          prop[2][vname] = bc_enum
+          icu_values = _pname_to_icu_prop["bc"][2]
+          icu_values.append((bc_enum, vname))
+        comment_value = ""
        continue
      # No mode, parse enum constants whose names contain
      # enough information to parse without requiring context.
@ -1589,7 +1570,9 @@ def ParseUCharHeader(icu_src_root):
          # Ignore "UCHAR_BINARY_LIMIT=57," etc.
          continue
        pname = GetShortPropertyName(prop_enum[6:])
-        _property_name_to_enum[pname] = prop_enum
+        icu_prop = (prop_enum, pname, [])
+        _icu_properties.append(icu_prop)
+        _pname_to_icu_prop[pname] = icu_prop
        continue
      match = _ublock_re.match(line)
      if match:
@ -1598,107 +1581,93 @@ def ParseUCharHeader(icu_src_root):
          continue
        prop = _properties["blk"]
        vname = GetShortPropertyValueName(prop, prop_enum[7:])
-        prop[2][vname] = prop_enum
+        icu_values = _pname_to_icu_prop["blk"][2]
+        icu_values.append((prop_enum, vname))
        continue
      match = _prop_and_value_re.match(line)
      if match:
        (prop_enum, vname) = match.group(1, 3)
        if vname == "COUNT" or _prop_and_alias_re.match(line):
          continue
-        prop = GetProperty(match.group(2))
+        pname = GetShortPropertyName(match.group(2))
+        prop = _properties[pname]
        vname = GetShortPropertyValueName(prop, vname)
-        prop[2][vname] = prop_enum
+        icu_values = _pname_to_icu_prop[pname][2]
+        icu_values.append((prop_enum, vname))
+  # ccc, lccc, tccc use their numeric values as "enum" values.
+  # In the UCD data, these numeric values are the first value names,
+  # followed by the short & long value names.
+  # List the ccc values in numeric order.
+  prop = _properties["ccc"]
+  icu_values = _pname_to_icu_prop["ccc"][2]
+  for ccc in sorted([int(name) for name in prop[2]]):
+    icu_values.append((ccc, str(ccc)))
+  _pname_to_icu_prop["lccc"][2].extend(icu_values)  # Copy ccc -> lccc.
+  _pname_to_icu_prop["tccc"][2].extend(icu_values)  # Copy ccc -> tccc.
+
  # No need to parse predictable General_Category_Mask enum constants.
-  short_gcm_name_to_enum = _properties["gcm"][2]
-  for value in short_gcm_name_to_enum:
-    short_gcm_name_to_enum[value] = "U_GC_" + value.upper() + "_MASK"
+  # Just define them in ASCII order.
+  prop = _properties["gcm"]
+  icu_values = _pname_to_icu_prop["gcm"][2]
+  for vname in sorted(prop[2]):
+    icu_values.append(("U_GC_" + vname.upper() + "_MASK", vname))
  # Hardcode known values for the normalization quick check properties,
  # see unorm2.h for the UNormalizationCheckResult enum.
-  short_name_to_enum = _properties["NFC_QC"][2]
-  short_name_to_enum["N"] = "UNORM_NO"
-  short_name_to_enum["Y"] = "UNORM_YES"
-  short_name_to_enum["M"] = "UNORM_MAYBE"
-  short_name_to_enum = _properties["NFKC_QC"][2]
-  short_name_to_enum["N"] = "UNORM_NO"
-  short_name_to_enum["Y"] = "UNORM_YES"
-  short_name_to_enum["M"] = "UNORM_MAYBE"
+  icu_values = _pname_to_icu_prop["NFC_QC"][2]
+  icu_values.append(("UNORM_NO", "N"))
+  icu_values.append(("UNORM_YES", "Y"))
+  icu_values.append(("UNORM_MAYBE", "M"))
+  _pname_to_icu_prop["NFKC_QC"][2].extend(icu_values)  # Copy NFC -> NFKC.
  # No "maybe" values for NF[K]D.
-  short_name_to_enum = _properties["NFD_QC"][2]
-  short_name_to_enum["N"] = "UNORM_NO"
-  short_name_to_enum["Y"] = "UNORM_YES"
-  short_name_to_enum = _properties["NFKD_QC"][2]
-  short_name_to_enum["N"] = "UNORM_NO"
-  short_name_to_enum["Y"] = "UNORM_YES"
+  icu_values = _pname_to_icu_prop["NFD_QC"][2]
+  icu_values.append(("UNORM_NO", "N"))
+  icu_values.append(("UNORM_YES", "Y"))
+  _pname_to_icu_prop["NFKD_QC"][2].extend(icu_values)  # Copy NFD -> NFKD.


-def WritePNamesDataHeader(out_path):
-  # Build a sorted list of (key0, enum, aliases) tuples
-  # to emulate the output order of the old genpname/preparse.pl.
-  #   key0 is either a preparse.pl property type string (for property names)
-  #        or a Unicode short property name (for property value names).
-  #   enum is the ICU4C enum constant name.
-  #   aliases is the tuple of the property's or value's names and aliases.
-  #       (We use a tuple, not the original list,
-  #        so that we can use it as a dict key.)
-  # TODO: once we are sure this works, simplify the order;
-  #       for example, change all "_bp" etc. to just ""
-  #       (outputs property names first in enum order),
-  #       and sorting ccc by numbers not strings
-  # TODO: simplify further, to make pnames_data.h more stable;
-  #       try not to print string or group index numbers
-  # TODO: wiki/ReviewTicket8972 with diff links
-  prop_type_to_old_type = {
-    "Binary": "_bp",
-    "Bitmask": "_op",
-    "Catalog": "_ep",
-    "Enumerated": "_ep",
-    "Miscellaneous": "_sp",
-    "Numeric": "_dp",
-    "String": "_sp"
-  }
-  pnames_data = [
-    ("binprop", "0", tuple(_binary_values["N"])),
-    ("binprop", "1", tuple(_binary_values["Y"]))
-  ]
+# Sample line to match:
+#    USCRIPT_LOMA   = 139,/* Loma */
+_uscript_re = re.compile(
+    " *(USCRIPT_[A-Z_]+) *= *[0-9]+ *, */\* *([A-Z][a-z]{3}) *\*/")
+
+def ParseUScriptHeader(icu_src_root):
+  uscript_path = os.path.join(icu_src_root, "source",
+                              "common", "unicode", "uscript.h")
+  icu_values = _pname_to_icu_prop["sc"][2]
+  with open(uscript_path, "r") as uscript_file:
+    for line in uscript_file:
+      match = _uscript_re.match(line)
+      if match:
+        (script_enum, script_code) = match.group(1, 2)
+        icu_values.append((script_enum, script_code))
+
+
+def CheckPNamesData():
+  """Checks that every ICU property has a full set of value enum constants,
+  and that the _icu_properties value names map back to the UCD."""
  missing_enums = []
-  # Only properties that have ICU API.
-  for (pname, prop_enum) in _property_name_to_enum.iteritems():
+  for (p_enum, pname, values) in _icu_properties:
    prop = _properties[pname]
-    # Sometimes the uchar.h UProperty type differs
-    # from the PropertyAliases.txt type.
-    if pname == "age":
-      type = "_sp"
-    elif pname in ("gcm", "scx"):
-      type = "_op"
-    else:
-      type = prop_type_to_old_type[prop[0]]
-    pnames_data.append((type, prop_enum, tuple(prop[1])))
-    if type != "_bp" and pname != "age":
-      short_name_to_enum = prop[2]
-      if pname.endswith("ccc"):
-        # ccc, lccc, tccc use the string forms of their numeric values
-        # as "enum" values.
-        # In the UCD data, these numeric strings are the first value names,
-        # followed by the short & long value names.
-        # Omit the numeric strings from the aliases as well.
-        for name in short_name_to_enum:
-          pnames_data.append((pname, name, tuple(prop[3][name][1:])))
-      else:
-        if pname == "gc":
-          # See comment about _gc_vname_to_enum in ParseUCharHeader().
-          short_name_to_enum = _gc_vname_to_enum
-        for (name, enum) in short_name_to_enum.iteritems():
-          if enum:
-            pnames_data.append((pname, enum, tuple(prop[3][name])))
-          else:
-            missing_enums.append((pname, name))
+    vnames = set(prop[2])  # Modifiable copy of the set of short value names.
+    for (v_enum, vname) in values:
+      if vname not in vnames:
+        raise ValueError("%s = %s (uchar.h %s) not in the UCD\n" %
+                         (pname, vname, v_enum))
+      vnames.remove(vname)
+    # Exceptions to the all-values check:
+    # - ICU does not have specific enum values for binary No/Yes.
+    # - ICU represents Age values via UVersionInfo rather than enum constants.
+    # - gc: ICU enum UCharCategory only has the single-category values.
+    #       (ICU's gcm property has all of the UCD gc property values.)
+    if vnames and not (prop[0] == "Binary" or pname in ("age", "gc")):
+      missing_enums.append((pname, vnames))
  if missing_enums:
    raise ValueError(
        "missing uchar.h enum constants for some property values: %s" %
        missing_enums)
-  pnames_data.sort()

-  # Write pnames_data.h.
+
+def WritePNamesDataHeader(out_path):
  year = datetime.date.today().strftime("%Y")
  with open(out_path, "w") as out_file:
    out_file.write("""/**
@ -1707,133 +1676,53 @@ def WritePNamesDataHeader(out_path):
 * others. All Rights Reserved.
 *
 * machine-generated by: icu/tools/unicode/py/preparseucd.py
+ *
+ * Unicode """ + _ucd_version + """
 */

 """)

-    out_file.write("/* Unicode version %s */\n" % _ucd_version)
-    v = _ucd_version.split(".")
-    while len(v) < 4: v.append("0")
-    for i in xrange(4):
-      out_file.write("const uint8_t VERSION_%d = %s;\n" % (i, v[i]))
-    out_file.write("\n")
-
-    # Write the string table with all of the names and aliases
-    # of all of the properties and their values.
-    # Unique strings in ASCII order.
-    # The first string must be the empty string.
-    strings_set = set([""])
-    for (key0, enum, aliases) in pnames_data:
-      for s in aliases: strings_set.add(s)
-    strings = sorted(strings_set)
-
-    out_file.write("const int32_t STRING_COUNT = %d;\n\n" % len(strings))
-
-    # While printing, create a mapping from string table entry to index.
-    string_to_id = {}
-    out_file.write("const AliasName STRING_TABLE[] = {\n")
-    for s in strings:
-      i = len(string_to_id)
-      out_file.write('    AliasName("%s", %d),\n' % (s, i))
-      string_to_id[s] = i
+    # Write an array of "binprop" Value object initializers
+    # with the value aliases shared among all binary properties.
+    out_file.write("const int32_t VALUES_binprop_COUNT = 2;\n\n")
+    out_file.write("const Value VALUES_binprop[] = {\n")
+    out_file.write('    Value(0, "%s"),\n' % " ".join(_binary_values["N"]))
+    out_file.write('    Value(1, "%s"),\n' % " ".join(_binary_values["Y"]))
    out_file.write("};\n\n")

-    # Emit the name group table.
-    # [A table of name groups. A name group is the list of names and aliases
-    # for a property or property value.
-    # The name group table looks like this:
-    #
-    #  114, -115, 116, -117, 0, -118, 65, -64, ...
-    #  [0]        [2]        [4]      [6]
-    #
-    # The entry at [0] consists of 2 strings, 114 and 115.
-    # The entry at [2] consists of 116 and 117, etc.
-    # The last entry is negative.
-
-    # Build the name group list with nameGroup indices.
-    name_groups = []
-    # Count the total number of values, not just the groups.
-    name_groups_total_length = 0
-
-    # Check for duplicate name groups, and reuse the first of a kind.
-    group_to_int = {}
-    for (key0, enum, aliases) in pnames_data:
-      index = group_to_int.get(aliases)
-      if index == None:
-        index = name_groups_total_length
-        group_to_int[aliases] = index
-        name_groups.append([string_to_id[s] for s in aliases])
-        name_groups_total_length += len(aliases)
-
-    out_file.write("const int32_t NAME_GROUP_COUNT = %d;\n\n" %
-                   name_groups_total_length)
-
-    out_file.write("int32_t NAME_GROUP[] = {\n")
-    # Emit one group per line, with annotations.
-    max_names = 0  # Maximum number of names & aliases per item.
-    start = 0
-    for group in name_groups:
-      line = "    "
-      aliases = []  # For comments.
-      for i in group[:-1]:
-        line += "%d, " % i
-        aliases.append('"%s"' % strings[i])
-      # Negative entry terminates the group.
-      i = group[-1]
-      line += "%d, " % -i
-      aliases.append('"%s"' % strings[i])
-      out_file.write(
-          line + " " * (24 - len(line)) +
-          "/* %3d: " % start + ", ".join(aliases) + " */\n")
-      length = len(group)
-      if length > max_names: max_names = length
-      start += length
-    out_file.write("};\n\n")
-
-    out_file.write("#define MAX_NAMES_PER_GROUP %d\n\n" % max_names)
-
-    # Emit the enumerated property values.
-    i = 0
-    while i < len(pnames_data):
-      pname = pnames_data[i][0]
-      if pname.startswith("_"):
-        i += 1
-        continue
-      if pname == "binprop":
-        count = 2
-      elif pname == "gc":
-        count = len(_gc_vname_to_enum)
-      else:
-        count = len(_properties[pname][2])
+    # For each property with named values, write an array of
+    # Value object initializers with the value enum and the aliases.
+    for (p_enum, pname, values) in _icu_properties:
+      if not values: continue
      out_file.write("const int32_t VALUES_%s_COUNT = %d;\n\n" %
-                     (pname, count))
-      out_file.write("const Alias VALUES_%s[] = {\n" % pname)
-      limit = i + count
-      while i < limit:
-        (pname, enum, aliases) = pnames_data[i]
-        out_file.write("    Alias((int32_t) %s, %d),\n" %
-                       (enum, group_to_int[aliases]))
-        i += 1
+                     (pname, len(values)))
+      out_file.write("const Value VALUES_%s[] = {\n" % pname)
+      for (v_enum, vname) in values:
+        aliases = _properties[pname][3][vname]
+        # ccc, lccc, tccc: Omit the numeric strings from the aliases.
+        # (See the comment about ccc in the PropertyValueAliases.txt header.)
+        if pname.endswith("ccc"): aliases = aliases[1:]
+        cast = "(int32_t)" if pname == "gcm" else ""
+        out_file.write('    Value(%s%s, "%s"),\n' %
+                       (cast, v_enum, " ".join(aliases)))
      out_file.write("};\n\n")

-    # Emit the top-level properties (binary, enumerated, etc.).
-    out_file.write("const int32_t PROPERTY_COUNT = %d;\n\n" %
-                   len(_property_name_to_enum))
-    out_file.write("const Property PROPERTY[] = {\n")
-    for (pname, enum, aliases) in pnames_data:
-      if not pname.startswith("_"): continue
-      pname = aliases[0]
-      if not pname: pname = aliases[1]  # Long name if no short name.
+    # For each property, write a Property object initializer
+    # with the property enum, its aliases, and a reference to its values.
+    out_file.write("const int32_t PROPERTIES_COUNT = %d;\n\n" %
+                   len(_icu_properties))
+    out_file.write("const Property PROPERTIES[] = {\n")
+    for (enum, pname, values) in _icu_properties:
      prop = _properties[pname]
-      group_index = group_to_int[aliases]
-      if prop[2] and pname != "age":  # Property with named values.
+      aliases = prop[1]
+      if values:  # Property with named values.
        if prop[0] == "Binary": pname = "binprop"
        out_file.write(
-            "    Property((int32_t) %s, %d, VALUES_%s_COUNT, VALUES_%s),\n" %
-            (enum, group_index, pname, pname))
+            '    Property(%s, "%s", VALUES_%s, VALUES_%s_COUNT),\n' %
+            (enum, " ".join(aliases), pname, pname))
      else:
-        out_file.write("    Property((int32_t) %s, %d, 0, NULL),\n" %
-                       (enum, group_index))
+        out_file.write('    Property(%s, "%s", NULL, 0),\n' %
+                       (enum, " ".join(aliases)))
    out_file.write("};\n")

 # main() ------------------------------------------------------------------- ***
@ -1878,10 +1767,13 @@ def main():
  with open(out_path, "w") as out_file:
    WritePreparsedUCD(out_file)
    out_file.flush()
+
  # TODO: PrintNameStats()
+
  # ICU data for property & value names API
-  ParseUScriptHeader(icu_src_root)
  ParseUCharHeader(icu_src_root)
+  ParseUScriptHeader(icu_src_root)
+  CheckPNamesData()
  genprops_path = os.path.join(icu_tools_root, "unicode", "c", "genprops")
  if not os.path.exists(genprops_path): os.makedirs(genprops_path)
  out_path = os.path.join(genprops_path, "pnames_data.h")