%{ # RUN: %empty-directory(%t) && %gyb %s | %FileCheck %s from GYBUnicodeDataUtils import * def test_trie_generation(property_table, configure_generator=None): trie_generator = UnicodeTrieGenerator() if configure_generator is not None: configure_generator(trie_generator) trie_generator.create_tables() trie_generator.fill_from_unicode_property(property_table) trie_generator.verify(property_table) trie_generator.freeze() trie_generator.verify(property_table) trie_generator.serialize(property_table) print(( trie_generator.bmp_first_level_index_bits, trie_generator.bmp_data_offset_bits, trie_generator.supp_first_level_index_bits, trie_generator.supp_second_level_index_bits, trie_generator.supp_data_offset_bits, trie_generator.bmp_lookup_bytes_per_entry, trie_generator.bmp_data_bytes_per_entry, trie_generator.supp_lookup1_bytes_per_entry, trie_generator.supp_lookup2_bytes_per_entry, trie_generator.supp_data_bytes_per_entry, len(trie_generator.trie_bytes), trie_generator.bmp_data_bytes_offset - trie_generator.bmp_lookup_bytes_offset, trie_generator.supp_lookup1_bytes_offset - trie_generator.bmp_data_bytes_offset, trie_generator.supp_lookup2_bytes_offset - trie_generator.supp_lookup1_bytes_offset, trie_generator.supp_data_bytes_offset - trie_generator.supp_lookup2_bytes_offset, len(trie_generator.trie_bytes) - trie_generator.supp_data_bytes_offset)) class PerfectlyCompressibleProperty(UnicodeProperty): def __init__(self): pass def get_default_value(self): return 'Default' def get_value(self, cp): return 'Default' def to_numeric_value(self, value): if value == 'Default': return 42 assert(False) def get_numeric_value(self, cp): return self.to_numeric_value(self.get_value(cp)) print('PerfectlyCompressibleProperty') test_trie_generation(PerfectlyCompressibleProperty()) # CHECK-LABEL: PerfectlyCompressibleProperty # CHECK: (8, 8, 5, 8, 8, 1, 1, 1, 1, 1, 1041, 256, 256, 17, 256, 256) # # Explanation for table sizes above: # # bmp_lookup: 1-byte words x 256 = 256 # bmp_data: 1 x 1 = 256 # supp_lookup1: 1 x 17 = 17 # supp_lookup2: 1 x 1*256 = 256 # supp_data: 1 x 1*256 = 256 class UncompressibleProperty(UnicodeProperty): def __init__(self): pass def get_default_value(self): return 42 def get_value(self, cp): # Split Unicode codespace into 128-entry "pages". Start each page with # a unique sequence of property values (page number) so that the result # cannot be compressed. page_number = cp >> 7 if cp % 0x80 == 1: return page_number & 0xff if cp % 0x80 == 2: return (page_number >> 8) & 0xff if cp % 0x80 == 3: return (page_number >> 16) & 0xff return 42 def to_numeric_value(self, value): return value def get_numeric_value(self, cp): return self.to_numeric_value(self.get_value(cp)) print('UncompressibleProperty, default trie parameters') test_trie_generation(UncompressibleProperty()) # CHECK-LABEL: UncompressibleProperty, default trie parameters # CHECK: (8, 8, 5, 8, 8, 2, 1, 1, 2, 1, 1123601, 512, 65536, 17, 8704, 1048832) # # Explanation for table sizes above: # # bmp_lookup: 2-byte words x 256 = 512 # bmp_data: 1 x 256*256 = 65536 # supp_lookup1: 1 x 17 = 17 # supp_lookup2: 2 x 17*256 = 8704 # supp_data: 1 x (16*256+1)*256 = 1048832 def configure_generator_for_16_bit_indexes(trie_generator): trie_generator.bmp_first_level_index_bits = 9 trie_generator.supp_first_level_index_bits = 10 trie_generator.supp_second_level_index_bits = 2 print('UncompressibleProperty, 16-bit indexes') test_trie_generation(UncompressibleProperty(), configure_generator_for_16_bit_indexes) # CHECK-LABEL: UncompressibleProperty, 16-bit indexes # CHECK: (9, 7, 10, 2, 9, 2, 1, 2, 2, 1, 1120840, 1024, 65536, 1088, 4104, 1049088) # # Explanation for table sizes above: # # bmp_lookup: 2-byte words x 512 = 1024 # bmp_data: 1 x 512*128 = 65536 # supp_lookup1: 2 x 544 = 1088 # supp_lookup2: 2 x 513*4 = 4104 # supp_data: 1 x (2048+1)*512 = 1049088 # gyb will print line markers after our output, so make sure that those # don't accidentally match any other CHECK lines. print('THE END') # CHECK-LABEL: THE END }%