Merge remote-tracking branch 'origin/master' into swift-3-api-guidelines

This commit is contained in:
Max Moiseev
2016-03-03 13:22:03 -08:00
231 changed files with 8374 additions and 3064 deletions

View File

@@ -205,16 +205,16 @@ class GraphemeClusterBreakPropertyTable(UnicodeProperty):
class UnicodeTrieGenerator(object):
# Note: if you change any of these parameters, don't forget to update the
# ASCII art above.
BMP_first_level_index_bits = 8
bmp_first_level_index_bits = 8
supp_first_level_index_bits = 5
supp_second_level_index_bits = 8
def get_bmp_first_level_index(self, cp):
return cp >> self.BMP_data_offset_bits
return cp >> self.bmp_data_offset_bits
def get_bmp_data_offset(self, cp):
return cp & ((1 << self.BMP_data_offset_bits) - 1)
return cp & ((1 << self.bmp_data_offset_bits) - 1)
def get_supp_first_level_index(self, cp):
return cp >> (self.supp_second_level_index_bits + self.supp_data_offset_bits)
@@ -235,7 +235,7 @@ class UnicodeTrieGenerator(object):
Don't change parameter values after calling this method.
"""
self.BMP_data_offset_bits = 16 - self.BMP_first_level_index_bits
self.bmp_data_offset_bits = 16 - self.bmp_first_level_index_bits
self.supp_data_offset_bits = \
21 - self.supp_first_level_index_bits - \
@@ -250,12 +250,12 @@ class UnicodeTrieGenerator(object):
self.supp_data_offset_bits)
# A mapping from BMP first-level index to BMP data block index.
self.BMP_lookup = [i for i in range(0, 1 << self.BMP_first_level_index_bits)]
self.bmp_lookup = [i for i in range(0, 1 << self.bmp_first_level_index_bits)]
# An array of BMP data blocks.
self.BMP_data = [
[-1 for i in range(0, 1 << self.BMP_data_offset_bits)]
for i in range(0, 1 << self.BMP_first_level_index_bits)
self.bmp_data = [
[-1 for i in range(0, 1 << self.bmp_data_offset_bits)]
for i in range(0, 1 << self.bmp_first_level_index_bits)
]
# A mapping from supp first-level index to an index of the second-level
@@ -277,9 +277,9 @@ class UnicodeTrieGenerator(object):
]
def splat(self, value):
for i in range(0, len(self.BMP_data)):
for j in range(0, len(self.BMP_data[i])):
self.BMP_data[i][j] = value
for i in range(0, len(self.bmp_data)):
for j in range(0, len(self.bmp_data[i])):
self.bmp_data[i][j] = value
for i in range(0, len(self.supp_data)):
for j in range(0, len(self.supp_data[i])):
@@ -287,8 +287,8 @@ class UnicodeTrieGenerator(object):
def set_value(self, cp, value):
if cp <= 0xffff:
data_block_index = self.BMP_lookup[self.get_bmp_first_level_index(cp)]
self.BMP_data[data_block_index][self.get_bmp_data_offset(cp)] = value
data_block_index = self.bmp_lookup[self.get_bmp_first_level_index(cp)]
self.bmp_data[data_block_index][self.get_bmp_data_offset(cp)] = value
else:
second_lookup_index = self.supp_lookup1[self.get_supp_first_level_index(cp)]
data_block_index = self.supp_lookup2[second_lookup_index][self.get_supp_second_level_index(cp)]
@@ -296,8 +296,8 @@ class UnicodeTrieGenerator(object):
def get_value(self, cp):
if cp <= 0xffff:
data_block_index = self.BMP_lookup[self.get_bmp_first_level_index(cp)]
return self.BMP_data[data_block_index][self.get_bmp_data_offset(cp)]
data_block_index = self.bmp_lookup[self.get_bmp_first_level_index(cp)]
return self.bmp_data[data_block_index][self.get_bmp_data_offset(cp)]
else:
second_lookup_index = self.supp_lookup1[self.get_supp_first_level_index(cp)]
data_block_index = self.supp_lookup2[second_lookup_index][self.get_supp_second_level_index(cp)]
@@ -310,9 +310,9 @@ class UnicodeTrieGenerator(object):
def verify(self, unicode_property):
for cp in range(0, 0x110000):
expectedValue = unicode_property.get_value(cp)
actualValue = self.get_value(cp)
assert(expectedValue == actualValue)
expected_value = unicode_property.get_value(cp)
actual_value = self.get_value(cp)
assert(expected_value == actual_value)
def freeze(self):
"""Compress internal trie representation.
@@ -333,17 +333,17 @@ class UnicodeTrieGenerator(object):
# result of the `map` is explicitly converted to a `list`.
return list(map(map_index, indexes))
# If self.BMP_data contains identical data blocks, keep the first one,
# remove duplicates and change the indexes in self.BMP_lookup to point to
# If self.bmp_data contains identical data blocks, keep the first one,
# remove duplicates and change the indexes in self.bmp_lookup to point to
# the first one.
i = 0
while i < len(self.BMP_data):
while i < len(self.bmp_data):
j = i + 1
while j < len(self.BMP_data):
if self.BMP_data[i] == self.BMP_data[j]:
self.BMP_data.pop(j)
self.BMP_lookup = \
remap_indexes(self.BMP_lookup, old_idx=j, new_idx=i)
while j < len(self.bmp_data):
if self.bmp_data[i] == self.bmp_data[j]:
self.bmp_data.pop(j)
self.bmp_lookup = \
remap_indexes(self.bmp_lookup, old_idx=j, new_idx=i)
else:
j += 1
i += 1
@@ -395,17 +395,17 @@ class UnicodeTrieGenerator(object):
for byte in self._int_to_le_bytes(elt, width)]
def serialize(self, unicode_property):
self.BMP_lookup_bytes_per_entry = 1 if len(self.BMP_data) < 256 else 2
self.BMP_data_bytes_per_entry = 1
self.bmp_lookup_bytes_per_entry = 1 if len(self.bmp_data) < 256 else 2
self.bmp_data_bytes_per_entry = 1
self.supp_lookup1_bytes_per_entry = 1 if len(self.supp_lookup2) < 256 else 2
self.supp_lookup2_bytes_per_entry = 1 if len(self.supp_data) < 256 else 2
self.supp_data_bytes_per_entry = 1
BMP_lookup_words = list(self.BMP_lookup)
BMP_data_words = [
bmp_lookup_words = list(self.bmp_lookup)
bmp_data_words = [
unicode_property.to_numeric_value(elt)
for block in self.BMP_data
for block in self.bmp_data
for elt in block]
supp_lookup1_words = list(self.supp_lookup1)
@@ -415,10 +415,10 @@ class UnicodeTrieGenerator(object):
for block in self.supp_data
for elt in block]
BMP_lookup_bytes = self._int_list_to_le_bytes(
BMP_lookup_words, self.BMP_lookup_bytes_per_entry)
BMP_data_bytes = self._int_list_to_le_bytes(
BMP_data_words, self.BMP_data_bytes_per_entry)
bmp_lookup_bytes = self._int_list_to_le_bytes(
bmp_lookup_words, self.bmp_lookup_bytes_per_entry)
bmp_data_bytes = self._int_list_to_le_bytes(
bmp_data_words, self.bmp_data_bytes_per_entry)
supp_lookup1_bytes = self._int_list_to_le_bytes(
supp_lookup1_words, self.supp_lookup1_bytes_per_entry)
@@ -429,11 +429,11 @@ class UnicodeTrieGenerator(object):
self.trie_bytes = []
self.BMP_lookup_bytes_offset = 0
self.trie_bytes += BMP_lookup_bytes
self.bmp_lookup_bytes_offset = 0
self.trie_bytes += bmp_lookup_bytes
self.BMP_data_bytes_offset = len(self.trie_bytes)
self.trie_bytes += BMP_data_bytes
self.bmp_data_bytes_offset = len(self.trie_bytes)
self.trie_bytes += bmp_data_bytes
self.supp_lookup1_bytes_offset = len(self.trie_bytes)
self.trie_bytes += supp_lookup1_bytes
@@ -478,9 +478,9 @@ def get_extended_grapheme_cluster_rules_matrix(grapheme_cluster_break_property_t
dict.fromkeys(any_value, None)
# Iterate over rules in the order of increasing priority.
for firstList, action, secondList in reversed(rules):
for first in firstList:
for second in secondList:
for first_list, action, second_list in reversed(rules):
for first in first_list:
for second in second_list:
rules_matrix[first][second] = action
# Make sure we can pack one row of the matrix into a 'uint16_t'.
@@ -532,10 +532,10 @@ def get_grapheme_cluster_break_tests_as_utf8(grapheme_break_test_file_name):
if code_point >= 0xd800 and code_point <= 0xdfff:
code_point = 0x200b
code_point = (b'\U%(cp)08x' % {b'cp': code_point}).decode('unicode_escape', 'strict')
as_UTF8_bytes = bytearray(code_point.encode('utf8', 'strict'))
as_UTF8_escaped = ''.join(['\\x%(byte)02x' % {'byte': byte} for byte in as_UTF8_bytes])
test += as_UTF8_escaped
curr_bytes += len(as_UTF8_bytes)
as_utf8_bytes = bytearray(code_point.encode('utf8', 'strict'))
as_utf8_escaped = ''.join(['\\x%(byte)02x' % {'byte': byte} for byte in as_utf8_bytes])
test += as_utf8_escaped
curr_bytes += len(as_utf8_bytes)
return (test, boundaries)