[stdlib] Clarify buffer comments in UTF8._decodeOne()

This commit is contained in:
Patrick Pijnappel
2016-03-22 22:33:13 +11:00
parent 771a81594f
commit ffe864687a

View File

@@ -202,8 +202,9 @@ public struct UTF8 : UnicodeCodec {
@warn_unused_result
public // @testable
static func _decodeOne(buffer: UInt32) -> (result: UInt32?, length: UInt8) {
// Note the buffer is read least significant byte first: [ #3 #2 #1 #0 ].
if buffer & 0x80 == 0 { // 1-byte sequence (ASCII), [ XXX XXX XXX CU0 ].
if buffer & 0x80 == 0 { // 1-byte sequence (ASCII), buffer: [ CU0 ].
let value = buffer & 0xff
return (value, 1)
}
@@ -227,7 +228,7 @@ public struct UTF8 : UnicodeCodec {
let bit1 = (lut1 >> index) & 1
switch (bit1, bit0) {
case (0, 0): // 2-byte sequence, [ XXX XXX CU1 CU0 ].
case (0, 0): // 2-byte sequence, buffer: [ CU1 CU0 ].
// Require 10xx xxxx 110x xxxx.
if _slowPath(buffer & 0xc0e0 != 0x80c0) { return (nil, 1) }
// Disallow xxxx xxxx xxx0 000x (<= 7 bits case).
@@ -237,7 +238,7 @@ public struct UTF8 : UnicodeCodec {
| (buffer & 0x001f) << 6
return (value, 2)
case (0, 1): // 3-byte sequence, [ XXX CU2 CU1 CU0 ].
case (0, 1): // 3-byte sequence, buffer: [ CU2 CU1 CU0 ].
// Disallow xxxx xxxx xx0x xxxx xxxx 0000 (<= 11 bits case).
if _slowPath(buffer & 0x00200f == 0x000000) { return (nil, 1) }
// Disallow xxxx xxxx xx1x xxxx xxxx 1101 (surrogate code points).
@@ -253,7 +254,7 @@ public struct UTF8 : UnicodeCodec {
| (buffer & 0x00000f) << 12
return (value, 3)
case (1, 0): // 4-byte sequence, [ CU3 CU2 CU1 CU0 ].
case (1, 0): // 4-byte sequence, buffer: [ CU3 CU2 CU1 CU0 ].
// Disallow xxxx xxxx xxxx xxxx xx00 xxxx xxxx x000 (<= 16 bits case).
if _slowPath(buffer & 0x00003007 == 0x00000000) { return (nil, 1) }
// If xxxx xxxx xxxx xxxx xxxx xxxx xxxx x1xx.