import TestsUtils public let CSVParsing = BenchmarkInfo( name: "CSVParsing2", runFunction: run_CSVParsing, tags: [.miniapplication, .api, .String], setUpFunction: { buildWorkload() }, tearDownFunction: nil) public let CSVParsingAlt = BenchmarkInfo( name: "CSVParsingAlt2", runFunction: run_CSVParsingAlt, tags: [.miniapplication, .api, .String], setUpFunction: { buildWorkload() }, tearDownFunction: nil) public let CSVParsingAltIndices = BenchmarkInfo( name: "CSVParsingAltIndices2", runFunction: run_CSVParsingAltIndices, tags: [.miniapplication, .api, .String], setUpFunction: { buildWorkload() }, tearDownFunction: nil) struct ParseError: Error { var message: String } let comma = ",".utf16.first! let newline = "\n".utf16.first! let carriageReturn = "\n".utf16.first! let quote = "\"".utf16.first! func parseQuotedField(_ remainder: inout Substring) throws -> Substring? { var result: Substring = "" // we accumulate the result while !remainder.isEmpty { guard let nextQuoteIndex = remainder.firstIndex(of: "\"") else { throw ParseError(message: "Expected a closing \"") } // Append until the next quote result += remainder.prefix(upTo: nextQuoteIndex) remainder.remove(upToAndIncluding: nextQuoteIndex) if let peek = remainder.utf16.first { switch peek { case quote: // two quotes after each other is an escaped quote remainder.removeFirst() result.append("\"") case comma: // field ending remainder.removeFirst() return result default: return result } } else { // End of the string return result } } throw ParseError(message: "Expected a closing quote") } // Consume a single field from `remainder` func parseField(_ remainder: inout Substring) throws -> Substring? { guard let start = remainder.utf16.first else { return nil } switch start { case quote: remainder.removeFirst() // remove the first quote return try parseQuotedField(&remainder) case newline: return nil default: // This is the most common case and should ideally be super fast... var index = remainder.utf16.startIndex while index < remainder.utf16.endIndex { switch remainder.utf16[index] { case comma: defer { remainder.remove(upToAndIncluding: index) } return remainder.prefix(upTo: index) case newline: let result = remainder.prefix(upTo: index) remainder.remove(upTo: index) return result default: remainder.utf16.formIndex(after: &index) } } let result = remainder remainder.removeAll() return result } } extension Substring { mutating func remove(upTo index: Index) { self = suffix(from: index) } mutating func remove(upToAndIncluding index: Index) { self = suffix(from: self.index(after: index)) } } // Consume a single line from `remainder` func parseLine(_ remainder: inout Substring, result: inout State, processField: (inout State, Int, Substring) -> ()) throws -> Bool { var fieldNumber = 0 while let field = try parseField(&remainder) { processField(&result, fieldNumber, field) fieldNumber += 1 } if !remainder.isEmpty { let next = remainder.utf16[remainder.utf16.startIndex] guard next == carriageReturn || next == newline else { throw ParseError(message: "Expected a newline or CR, got \(next)") } while let x = remainder.utf16.first, x == carriageReturn || x == newline { remainder.utf16.removeFirst() } } return !remainder.isEmpty && fieldNumber > 0 } extension String { func parseAlt() -> [[String]] { var result: [[String]] = [[]] var currentField = "".unicodeScalars var inQuotes = false func flush() { result[result.endIndex-1].append(String(currentField)) currentField.removeAll() } for c in self.unicodeScalars { switch (c, inQuotes) { case (",", false): flush() case ("\n", false): flush() result.append([]) case ("\"", _): inQuotes = !inQuotes currentField.append(c) default: currentField.append(c) } } flush() return result } func parseAltIndices() -> [[Substring]] { var result: [[Substring]] = [[]] var fieldStart = self.startIndex var inQuotes = false func flush(endingAt end: Index) { result[result.endIndex-1].append(self[fieldStart..