mirror of
https://github.com/apple/swift.git
synced 2025-12-21 12:14:44 +01:00
703 lines
18 KiB
Swift
703 lines
18 KiB
Swift
//===----------------------------------------------------------------------===//
|
|
// String Type
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
struct String : Hashable, BuiltinStringLiteralConvertible,
|
|
StringLiteralConvertible {
|
|
var str_value : StringByteData
|
|
|
|
static func _convertFromBuiltinStringLiteral(value : Builtin.RawPointer,
|
|
byteSize : Builtin.Int64,
|
|
isASCII : Builtin.Int1) -> String {
|
|
var s : String
|
|
var owner : Builtin.ObjectPointer
|
|
s.str_value = StringByteData.convertFromHeapArray(value, owner, byteSize)
|
|
s.str_value.setASCII(_getBool(isASCII))
|
|
s.str_value.setCString(true)
|
|
return s
|
|
}
|
|
|
|
typealias StringLiteralType = String
|
|
static func convertFromStringLiteral(value : String) -> String {
|
|
return value
|
|
}
|
|
|
|
constructor() {
|
|
this.str_value = StringByteData()
|
|
}
|
|
|
|
constructor(str_value : StringByteData) {
|
|
this.str_value = str_value
|
|
}
|
|
|
|
constructor(sz : Int, c : Char) {
|
|
var s = String(c)
|
|
str_value = StringByteData.getNew(sz*s.byteLength())
|
|
var k = 0
|
|
for i in 0..sz {
|
|
for j in 0..s.byteLength() {
|
|
str_value[k++] = s.str_value[j]
|
|
}
|
|
}
|
|
}
|
|
|
|
func asUInt8() -> UInt8[] {
|
|
var r : UInt8[]
|
|
r.base = str_value.base
|
|
r.length = str_value.length
|
|
r.owner = str_value.owner
|
|
return r
|
|
}
|
|
|
|
func byteLength() -> Int {
|
|
return str_value.length
|
|
}
|
|
|
|
func _makeNulTerminated() {
|
|
if str_value.isCString() {
|
|
return
|
|
}
|
|
|
|
this = str_value.getCString()
|
|
}
|
|
|
|
struct CharEnumeratorType : Enumerable, Enumerator {
|
|
var value : String
|
|
|
|
typealias EnumeratorType = CharEnumeratorType
|
|
func getEnumeratorType() -> CharEnumeratorType {
|
|
return this
|
|
}
|
|
|
|
typealias Element = Char
|
|
func isEmpty() -> Bool {
|
|
return value.isEmpty()
|
|
}
|
|
func next() -> Char {
|
|
// string ranges do not end in the middle of a Char
|
|
// one range check is sufficient.
|
|
debugTrap(!isEmpty())
|
|
debugTrap(value.str_value.length > 0)
|
|
|
|
var u8 = StringByte(value.str_value.base.get())
|
|
if u8 < 0x80 {
|
|
value.str_value.base += 1
|
|
value.str_value.length -= 1
|
|
return Char(UInt32(u8))
|
|
}
|
|
|
|
return _nextSlow(u8)
|
|
}
|
|
|
|
func /*[noinline]*/ _nextSlow(u8 : StringByte) -> Char {
|
|
var u8_1 = StringByte((value.str_value.base + 1).get())
|
|
if u8 < 0xE0 {
|
|
value.str_value.base += 2
|
|
value.str_value.length -= 2
|
|
return Char((UInt32(u8 & 0x1F) << 6) |
|
|
UInt32(u8_1 & 0x3F))
|
|
}
|
|
var u8_2 = StringByte((value.str_value.base + 2).get())
|
|
if u8 < 0xF0 {
|
|
value.str_value.base += 3
|
|
value.str_value.length -= 3
|
|
return Char((UInt32(u8 & 0x0F) << 12) |
|
|
(UInt32(u8_1 & 0x3F) << 6) |
|
|
UInt32(u8_2 & 0x3F))
|
|
}
|
|
var u8_3 = StringByte((value.str_value.base + 3).get())
|
|
value.str_value.base += 4
|
|
value.str_value.length -= 4
|
|
return Char((UInt32(u8 & 0x07) << 18) |
|
|
(UInt32(u8_1 & 0x3F) << 12) |
|
|
(UInt32(u8_2 & 0x3F) << 6) |
|
|
UInt32(u8_3 & 0x3F))
|
|
}
|
|
}
|
|
|
|
var chars : CharEnumeratorType {
|
|
return CharEnumeratorType(this)
|
|
}
|
|
var lines : String[] {
|
|
return split('\n')
|
|
}
|
|
var bytes : StringByteData {
|
|
return str_value
|
|
}
|
|
|
|
func size() -> Int {
|
|
if str_value.isASCII() {
|
|
return byteLength()
|
|
}
|
|
var Result = 0
|
|
for i in chars {
|
|
++Result
|
|
}
|
|
return Result
|
|
}
|
|
|
|
var length : Int {
|
|
return size()
|
|
}
|
|
|
|
func isEmpty() -> Bool {
|
|
return byteLength() == 0
|
|
}
|
|
|
|
subscript (rng : IntEnumeratorType) -> String {
|
|
var len = rng.max - rng.min
|
|
if str_value.isASCII() {
|
|
debugTrap(UInt(len) <= UInt(byteLength()))
|
|
return String(StringByteData.convertFromHeapArray(
|
|
(str_value.base + rng.min).value,
|
|
str_value.owner,
|
|
len.value))
|
|
}
|
|
return _subscriptNonASCII(rng)
|
|
}
|
|
|
|
func _subscriptNonASCII (rng : IntEnumeratorType) -> String {
|
|
var len = rng.max - rng.min
|
|
var start = rng.min
|
|
var idx = 0
|
|
while start > 0 {
|
|
var tmp : StringByte = str_value[idx++]
|
|
if tmp < 0x80 {
|
|
--start
|
|
} else if tmp >= 0xC0 {
|
|
--start
|
|
}
|
|
}
|
|
var oldidx = idx
|
|
while len > 0 {
|
|
var tmp = str_value[idx++]
|
|
if tmp < 0x80 {
|
|
--len
|
|
} else if tmp >= 0xC0 {
|
|
--len
|
|
}
|
|
}
|
|
return String(StringByteData.convertFromHeapArray(
|
|
(str_value.base + oldidx).value,
|
|
str_value.owner, (idx - oldidx).value))
|
|
}
|
|
|
|
subscript (idx : Int) -> Char {
|
|
if str_value.isASCII() {
|
|
if idx < byteLength() {
|
|
return Char(UInt32((str_value.base + idx).get()))
|
|
}
|
|
}
|
|
return subscriptNonASCII(idx)
|
|
}
|
|
|
|
func subscriptNonASCII(idx : Int) -> Char {
|
|
for c in chars {
|
|
if idx-- == 0 {
|
|
return c
|
|
}
|
|
}
|
|
alwaysTrap()
|
|
}
|
|
|
|
func replPrint() {
|
|
print('"')
|
|
for c in chars {
|
|
c.replPrintCharBody()
|
|
}
|
|
print('"')
|
|
}
|
|
|
|
// FIXME: Locales make this interesting
|
|
var uppercase : String {
|
|
var len = byteLength()
|
|
var resultArray = StringByteData.getNew(len)
|
|
var i : Int
|
|
while i != len {
|
|
var u8 = str_value[i]
|
|
if u8 < 0x80 {
|
|
if (97 .. 123).contains(Int(u8)) {
|
|
resultArray[i] = u8 - 32
|
|
} else {
|
|
resultArray[i] = u8
|
|
}
|
|
i += 1
|
|
} else if u8 < 0xE0 {
|
|
resultArray[i] = u8
|
|
var u8_1 = str_value[i + 1]
|
|
if u8 == 0xC3 && (0xA0 .. 0xBF).contains(Int(u8_1)) && u8_1 != 0xB7 {
|
|
resultArray[i+1] = u8_1 - 0x20
|
|
} else {
|
|
resultArray[i+1] = u8_1
|
|
}
|
|
i += 2
|
|
} else if u8 < 0xF0 {
|
|
resultArray[i] = u8
|
|
resultArray[i+1] = str_value[i+1]
|
|
resultArray[i+2] = str_value[i+2]
|
|
i += 3
|
|
} else {
|
|
resultArray[i] = u8
|
|
resultArray[i+1] = str_value[i+1]
|
|
resultArray[i+2] = str_value[i+2]
|
|
resultArray[i+3] = str_value[i+3]
|
|
i += 4
|
|
}
|
|
}
|
|
|
|
var result : String
|
|
result.str_value = resultArray
|
|
return result
|
|
}
|
|
|
|
// FIXME: Locales make this interesting
|
|
var lowercase : String {
|
|
var len = byteLength()
|
|
var resultArray = StringByteData.getNew(len)
|
|
var i : Int
|
|
while i != len {
|
|
var u8 = str_value[i]
|
|
if u8 < 0x80 {
|
|
if (65 .. 91).contains(Int(u8)) {
|
|
resultArray[i] = u8 + 32
|
|
} else {
|
|
resultArray[i] = u8
|
|
}
|
|
i += 1
|
|
} else if u8 < 0xE0 {
|
|
resultArray[i] = u8
|
|
var u8_1 = str_value[i + 1]
|
|
if u8 == 0xC3 && (0x80 .. 0x9F).contains(Int(u8_1)) && u8_1 != 0x97 {
|
|
resultArray[i+1] = u8_1 + 0x20
|
|
} else {
|
|
resultArray[i+1] = u8_1
|
|
}
|
|
i += 2
|
|
} else if u8 < 0xF0 {
|
|
resultArray[i] = u8
|
|
resultArray[i+1] = str_value[i+1]
|
|
resultArray[i+2] = str_value[i+2]
|
|
i += 3
|
|
} else {
|
|
resultArray[i] = u8
|
|
resultArray[i+1] = str_value[i+1]
|
|
resultArray[i+2] = str_value[i+2]
|
|
resultArray[i+3] = str_value[i+3]
|
|
i += 4
|
|
}
|
|
}
|
|
|
|
var result : String
|
|
result.str_value = resultArray
|
|
return result
|
|
}
|
|
|
|
constructor(c : Char) {
|
|
str_value = StringByteData(4)
|
|
_append(c)
|
|
}
|
|
|
|
// Predicates
|
|
|
|
// FIXME: Replace this with a generic isAll().
|
|
func _isAll(predicate : (Char) -> Bool) -> Bool {
|
|
for c in chars { if !predicate(c) { return false } }
|
|
|
|
return true
|
|
}
|
|
|
|
func startsWith(prefix : String) -> Bool {
|
|
if prefix.size() > size() { return false }
|
|
|
|
return this[0..prefix.size()] == prefix
|
|
}
|
|
|
|
func isAlpha() -> Bool { return _isAll({ $0.isAlpha() }) }
|
|
func isDigit() -> Bool { return _isAll({ $0.isDigit() }) }
|
|
func isSpace() -> Bool { return _isAll({ $0.isSpace() }) }
|
|
}
|
|
|
|
extension String : FormattedPrintable {
|
|
func format(kind : Char, layout : String) -> String {
|
|
var toPrint = this
|
|
if kind == 'u' { toPrint = uppercase }
|
|
else if kind == 'l' { toPrint = lowercase }
|
|
return Format(layout).printToString(toPrint)
|
|
}
|
|
}
|
|
|
|
// StringLiteralType specifies the default type to use for an string literal
|
|
// when the type isn't constrained.
|
|
typealias StringLiteralType = String
|
|
|
|
|
|
// Concatenation.
|
|
extension String {
|
|
func _append(bytes: UnsafePointer<UInt8>, count: Int) {
|
|
str_value.appendBytes(bytes, count)
|
|
}
|
|
|
|
func _append(c_: Char) {
|
|
var buf: (UInt8,UInt8,UInt8,UInt8)
|
|
var count = 1
|
|
|
|
var c = UInt32(c_)
|
|
buf.3 = UInt8(c)
|
|
if c >= UInt32(1<<7) {
|
|
c >>= 6
|
|
++count
|
|
buf.3 = (buf.3 & 0x3F) | 0x80 // 10xxxxxx
|
|
buf.2 = UInt8(c)
|
|
if c < UInt32(1<<5) {
|
|
buf.2 |= 0xC0 // 110xxxxx
|
|
}
|
|
else {
|
|
c >>= 6
|
|
++count
|
|
buf.2 = (buf.2 & 0x3F) | 0x80 // 10xxxxxx
|
|
buf.1 = UInt8(c)
|
|
if c < UInt32(1<<4) {
|
|
buf.1 |= 0xE0 // 1110xxxx
|
|
}
|
|
else {
|
|
c >>= 6
|
|
++count
|
|
buf.1 = (buf.1 & 0x3F) | 0x80 // 10xxxxxx
|
|
buf.0 = UInt8(c | 0xF0) // 11110xxx
|
|
}
|
|
}
|
|
}
|
|
|
|
str_value.appendBytes(UnsafePointer.addressOf(&buf.0) + 4 - count, count)
|
|
}
|
|
}
|
|
|
|
func +(lhs : String, rhs : String) -> String {
|
|
if (lhs.isEmpty()) {
|
|
return rhs
|
|
}
|
|
// FIXME: Consider further optimizations here, like using rhs as the
|
|
// result if it's uniquely referenced and has the capacity
|
|
lhs._append(rhs.str_value.base, rhs.str_value.length)
|
|
return lhs
|
|
}
|
|
func +(lhs : String, rhs : Char) -> String {
|
|
lhs._append(rhs)
|
|
return lhs
|
|
}
|
|
func +(lhs : Char, rhs : String) -> String {
|
|
var result = String(StringByteData(rhs.byteLength() + 4))
|
|
result._append(lhs)
|
|
result._append(rhs.str_value.base, rhs.byteLength())
|
|
return result
|
|
}
|
|
func +(lhs : Char, rhs : Char) -> String {
|
|
var result = String(StringByteData(8))
|
|
result._append(lhs)
|
|
result._append(rhs)
|
|
return result
|
|
}
|
|
|
|
|
|
// String append
|
|
func [assignment] += (lhs : [byref] String, rhs : String) {
|
|
if (lhs.isEmpty()) {
|
|
lhs = rhs
|
|
}
|
|
else {
|
|
lhs._append(rhs.str_value.base, rhs.str_value.length)
|
|
}
|
|
}
|
|
|
|
func [assignment] += (lhs : [byref] String, rhs : Char) {
|
|
lhs._append(rhs)
|
|
}
|
|
|
|
|
|
// Comparison operators
|
|
extension String : Comparable {
|
|
func __equal__(rhs: String) -> Bool {
|
|
if str_value.length != rhs.str_value.length {
|
|
return false
|
|
}
|
|
if str_value.base == rhs.str_value.base {
|
|
return true
|
|
}
|
|
var len = byteLength()
|
|
for var i = 0; i < len; ++i {
|
|
if str_value[i] != rhs.str_value[i] {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func __less__(rhs: String) -> Bool {
|
|
var i = 0
|
|
for c in rhs.str_value {
|
|
if i == byteLength() || str_value[i] < c {
|
|
return true
|
|
}
|
|
if c < str_value[i] {
|
|
return false
|
|
}
|
|
++i
|
|
}
|
|
return false
|
|
}
|
|
}
|
|
|
|
// Conversions to string from other types.
|
|
extension String {
|
|
constructor(v : Int128, radix : Int = 10, uppercase : Bool = false) {
|
|
var Buffer = StringByteData.getNew(128)
|
|
var i = c_print_int(Buffer.base.value, 128, v, radix, uppercase)
|
|
str_value = Buffer[0..Int(i)]
|
|
}
|
|
|
|
constructor(v : UInt128, radix : Int = 10, uppercase : Bool = false) {
|
|
var Buffer = StringByteData.getNew(128)
|
|
var i = c_print_uint(Buffer.base.value, 128, v, radix, uppercase)
|
|
str_value = Buffer[0..Int(i)]
|
|
}
|
|
|
|
constructor(v : Int8, radix : Int = 10, uppercase : Bool = false) {
|
|
this = String(Int128(v), radix, uppercase)
|
|
}
|
|
constructor(v : Int16, radix : Int = 10, uppercase : Bool = false) {
|
|
this = String(Int128(v), radix, uppercase)
|
|
}
|
|
constructor(v : Int32, radix : Int = 10, uppercase : Bool = false) {
|
|
this = String(Int128(v), radix, uppercase)
|
|
}
|
|
constructor(v : Int64, radix : Int = 10, uppercase : Bool = false) {
|
|
this = String(Int128(v), radix, uppercase)
|
|
}
|
|
constructor(v : UInt8, radix : Int = 10, uppercase : Bool = false) {
|
|
this = String(UInt128(v), radix, uppercase)
|
|
}
|
|
constructor(v : UInt16, radix : Int = 10, uppercase : Bool = false) {
|
|
this = String(UInt128(v), radix, uppercase)
|
|
}
|
|
constructor(v : UInt32, radix : Int = 10, uppercase : Bool = false) {
|
|
this = String(UInt128(v), radix, uppercase)
|
|
}
|
|
constructor(v : UInt64, radix : Int = 10, uppercase : Bool = false) {
|
|
this = String(UInt128(v), radix, uppercase)
|
|
}
|
|
|
|
constructor(v : Double) {
|
|
var Buffer = StringByteData.getNew(256)
|
|
var i = c_print_double(Buffer.base.value, v)
|
|
this = String(Buffer[0..Int(i)])
|
|
}
|
|
constructor(v : Float) {
|
|
this = String(Double(v))
|
|
}
|
|
|
|
constructor(b : Bool) {
|
|
if b {
|
|
this = "true"
|
|
} else {
|
|
this = "false"
|
|
}
|
|
}
|
|
}
|
|
|
|
// Conversions from string to other types.
|
|
extension String {
|
|
/// \brief If the string represents an integer that fits into an Int, returns
|
|
/// the corresponding integer.
|
|
func toInt() -> Optional<Int> {
|
|
if length <= 0 {
|
|
return None
|
|
}
|
|
|
|
// Interpet '+' or '-' before the number.
|
|
var startWithIndex = 0
|
|
var negativeFactor = -1
|
|
var firstC = this[0]
|
|
if (firstC == '+') {
|
|
startWithIndex = 1
|
|
} else if (firstC == '-') {
|
|
startWithIndex = 1
|
|
negativeFactor = 1
|
|
}
|
|
|
|
// Interpret the string as an integer.
|
|
// Since Int.min() has a larger absolute value, perform addition with
|
|
// negative numbers; detect underflows before they happen.
|
|
var res : Int = 0
|
|
for c in this[startWithIndex..length].chars {
|
|
if !c.isDigit() {
|
|
// Conversion failed if a non-digit is encountered.
|
|
return None
|
|
}
|
|
|
|
// Underflow occurs if res * 10 < Int.min().
|
|
if res < Int.min() / 10 {
|
|
return None
|
|
}
|
|
res = res * 10
|
|
|
|
var d : Int = (c - '0')
|
|
// Underflow occurs if res - d < Int.min().
|
|
if res < Int.min() + d {
|
|
return None
|
|
}
|
|
res = res - d
|
|
}
|
|
|
|
// If res is Int.min() and the result should be positive, the next
|
|
// operation will overflow.
|
|
if negativeFactor == -1 && res == Int.min() {
|
|
return None
|
|
}
|
|
|
|
return Optional(res * negativeFactor)
|
|
}
|
|
}
|
|
|
|
extension String {
|
|
/// \brief Produce a substring of the given string from the given character
|
|
/// index to the end of the string.
|
|
func substr(start : Int) -> String {
|
|
var rng = chars
|
|
while start-- > 0 {
|
|
rng.next()
|
|
}
|
|
var result = this
|
|
var bytes = rng.value.str_value.base - str_value.base
|
|
result.str_value.base += bytes
|
|
result.str_value.length -= bytes
|
|
return result
|
|
}
|
|
|
|
/// \brief Split the given string at the given delimiter character, returning
|
|
/// the strings before and after that character (neither includes the character
|
|
/// found) and a boolean value indicating whether the delimiter was found.
|
|
func splitFirst(delim : Char)
|
|
-> (before: String, after: String, wasFound : Bool)
|
|
{
|
|
var rng = chars
|
|
while !rng.isEmpty() {
|
|
var rngBefore = rng
|
|
if rng.next() == delim {
|
|
var before = this
|
|
before.str_value.length = rngBefore.value.str_value.base - str_value.base
|
|
before.str_value.setASCII(str_value.isASCII())
|
|
before.str_value.setCString(false)
|
|
|
|
var after = this
|
|
after.str_value.base += rng.value.str_value.base - str_value.base
|
|
after.str_value.length = after.str_value.length -
|
|
(rng.value.str_value.base - str_value.base)
|
|
return (before, after, true)
|
|
}
|
|
}
|
|
return (this, "", false)
|
|
}
|
|
|
|
/// \brief Split the given string at the first character for which the given
|
|
/// predicate returns true. Returns the string before that character, the
|
|
/// character that matches, the string after that character, and a boolean value
|
|
/// indicating whether any character was found.
|
|
func splitFirstIf(pred : (Char) -> Bool)
|
|
-> (before: String, found: Char, after : String, wasFound: Bool)
|
|
{
|
|
var rng = chars
|
|
while !rng.isEmpty() {
|
|
var rngBefore = rng
|
|
var c = rng.next()
|
|
if pred(c) {
|
|
var before = this
|
|
before.str_value.length = rngBefore.value.str_value.base -
|
|
str_value.base
|
|
before.str_value.setASCII(str_value.isASCII())
|
|
before.str_value.setCString(false)
|
|
|
|
var after = this
|
|
after.str_value.base += rng.value.str_value.base - str_value.base
|
|
after.str_value.length = after.str_value.length -
|
|
(rng.value.str_value.base - str_value.base)
|
|
return (before, c, after, true)
|
|
}
|
|
}
|
|
return (this, Char(0), "", false)
|
|
}
|
|
|
|
/// \brief Split the given string at each occurrence of the
|
|
/// delimiter character, returning an array of strings that
|
|
/// before/between/after the delimiters.
|
|
func split(arg : Char) -> String[] {
|
|
// Create a vector to hold the result.
|
|
var lines = Vector<String>()
|
|
var contents = this
|
|
while true {
|
|
var split = contents.splitFirst(arg)
|
|
lines.append(split.before)
|
|
if !split.wasFound {
|
|
break
|
|
}
|
|
contents = split.after
|
|
}
|
|
|
|
// Convert the vector to an array.
|
|
//
|
|
// FIXME: Remove this.
|
|
var linesArray = new String[lines.length]
|
|
for i in 0..lines.length {
|
|
linesArray[i] = lines[i]
|
|
}
|
|
|
|
return linesArray
|
|
}
|
|
|
|
/// \brief Split the given string at each occurrence of a character for which
|
|
/// the given predicate evaluates true, returning an array of strings that
|
|
/// before/between/after those delimiters.
|
|
func splitIf(pred : (Char) -> Bool) -> String[] {
|
|
// Create a vector to hold the result.
|
|
var lines = Vector<String>()
|
|
var contents = this
|
|
while true {
|
|
var split = contents.splitFirstIf(pred)
|
|
lines.append(split.before)
|
|
if !split.wasFound {
|
|
break
|
|
}
|
|
contents = split.after
|
|
}
|
|
|
|
// Convert the vector to an array.
|
|
//
|
|
// FIXME: Remove this.
|
|
var linesArray = new String[lines.length]
|
|
for i in 0..lines.length {
|
|
linesArray[i] = lines[i]
|
|
}
|
|
|
|
return linesArray
|
|
}
|
|
}
|
|
|
|
extension String {
|
|
func hashValue() -> Int {
|
|
var r : UInt = 5381
|
|
for byte in str_value {
|
|
r = ((r << 5) + r) + UInt(byte)
|
|
}
|
|
return Int(r)
|
|
}
|
|
}
|
|
|
|
extension String : StringInterpolationConvertible {
|
|
static func convertFromStringInterpolation(strings : String...) -> String {
|
|
var result : String
|
|
for str in strings {
|
|
result += str
|
|
}
|
|
return result
|
|
}
|
|
}
|