// RUN: %target-run-simple-swift // REQUIRES: executable_test // REQUIRES: objc_interop import StdlibUnittest import NaturalLanguage var tests = TestSuite("NaturalLanguage") if #available(macOS 10.14, iOS 12.0, watchOS 5.0, tvOS 12.0, *) { tests.test("recognizer") { let recognizer = NLLanguageRecognizer() let str = "This is a test mein Freund" recognizer.processString(str) recognizer.languageHints = [.english: 0.9, .german: 0.1] let lang = recognizer.dominantLanguage expectEqual(NLLanguage.english, lang) let hypotheses = recognizer.languageHypotheses(withMaximum: 2) expectEqual(hypotheses.count, 2) let enProb = hypotheses[.english] ?? 0.0 let deProb = hypotheses[.german] ?? 0.0 let frProb = hypotheses[.french] ?? 0.0 expectNotEqual(0.0, enProb) expectNotEqual(0.0, deProb) expectEqual(0.0, frProb) } tests.test("tokenizer") { let tokenizer = NLTokenizer(unit:.word) let str = "This is a test. 😀" let strRange = Range(NSMakeRange(0, 18), in: str)! tokenizer.string = str tokenizer.setLanguage(.english) let tokenRange1 = tokenizer.tokenRange(at: str.startIndex) let tokenArray = tokenizer.tokens(for: strRange) let tokenRange2 = tokenArray[0] expectEqual(tokenRange1, tokenRange2) expectEqual("This", str[tokenRange1]) expectEqual(5, tokenArray.count) var numTokens = 0 tokenizer.enumerateTokens(in: strRange) { (tokenRange, attrs) -> Bool in if (numTokens == 0) { expectEqual(tokenRange, tokenRange1) } numTokens = numTokens + 1 return true } expectEqual(5, numTokens) expectEqual("😀", str[tokenArray[4]]) } tests.test("tagger") { let tagger = NLTagger(tagSchemes: [.tokenType]) let str = "This is a test. 😀" let strRange = Range(NSMakeRange(0, 18), in: str)! tagger.string = str tagger.setLanguage(.english, range: strRange) let ortho = NSOrthography.defaultOrthography(forLanguage: "en") tagger.setOrthography(ortho, range: strRange) let (tag1, tokenRange1) = tagger.tag(at: str.startIndex, unit: .word, scheme: .tokenType) let tags = tagger.tags(in: strRange, unit: .word, scheme: .tokenType, options: .omitWhitespace) let (tag2, tokenRange2) = tags[0] let tokenRange3 = tagger.tokenRange(at: str.startIndex, unit: .word) expectEqual(NLTag.word, tag1) expectEqual(NLTag.word, tag2) expectEqual(tokenRange1, tokenRange2) expectEqual(tokenRange2, tokenRange3) expectEqual("This", str[tokenRange1]) expectEqual(6, tags.count) var numTokens = 0 tagger.enumerateTags(in: strRange, unit: .word, scheme: .tokenType, options: .omitWhitespace) { (tag, tokenRange) -> Bool in let (tagAt, tokenRangeAt) = tagger.tag(at: tokenRange.lowerBound, unit: .word, scheme: .tokenType) expectEqual(tag, tagAt) expectEqual(tokenRange, tokenRangeAt) if (numTokens == 0) { expectEqual(NLTag.word, tag) expectEqual(tokenRange, tokenRange1) } numTokens += 1 return true } expectEqual(6, numTokens) let (_, tokenRange4) = tags[5] expectEqual("😀", str[tokenRange4]) } } runAllTests()