Files
swift-mirror/test/stdlib/SIMDUnsignedInitializers.swift.gyb
Stephen Canon 592d72bba9 Concrete SIMD.init(repeating:) and SIMD.init(lowHalf:highHalf:) optimizations (#81766)
WIP to add more overloads to optimize SIMD codegen on concrete types.
Here we do:

- init(repeating:)
- init(lowHalf:highHalf:)

These are always inlined, even in debug, since LLVM knows how to lower
them to one or two instructions on the targets that we care about.
2025-05-27 15:15:13 -04:00

84 lines
3.2 KiB
Swift

//===--- SIMDUnsignedInitializers.swift.gyb -------------------*- swift -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2025 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
// RUN: %empty-directory(%t)
// RUN: %gyb %s -o %t/SIMDUnsignedInitializers.swift
// RUN: %target-swift-frontend -primary-file %t/SIMDUnsignedInitializers.swift -S | %FileCheck %t/SIMDUnsignedInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
// RUN: %target-swift-frontend -primary-file %t/SIMDUnsignedInitializers.swift -S -O | %FileCheck %t/SIMDUnsignedInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
import Swift
%for bits in [8,16,32,64]:
% for totalBits in [64,128]:
% n = totalBits // bits
% if n != 1:
% neonSuffix = str(n) + {8:'b',16:'h',32:'s',64:'d'}[bits]
func repeating${n}_uint${bits}(_ scalar: UInt${bits}) -> SIMD${n}<UInt${bits}> {
SIMD${n}(repeating: scalar)
}
// CHECK: repeating${n}_uint${bits}{{[[:alnum:]]+}}:
// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, {{[wx]}}0
// CHECKO-arm64-NEXT: ret
// CHECKOnone-arm64: dup.${neonSuffix}
// CHECKOnone-arm64: ret
% end
% end
%end
func concat8x8(_ a: SIMD8<UInt8>, _ b: SIMD8<UInt8>) -> SIMD16<UInt8> {
SIMD16(lowHalf: a, highHalf: b)
}
// CHECK: s24SIMDUnsignedInitializers9concat8x8ys6SIMD16Vys5UInt8VGs5SIMD8VyAFG_AJtF:
// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
// CHECKO-arm64-NEXT: ret
// CHECKO-x86_64: punpcklqdq
func concat16x8(_ a: SIMD16<UInt8>, _ b: SIMD16<UInt8>) -> SIMD32<UInt8> {
SIMD32(lowHalf: a, highHalf: b)
}
// CHECK: s24SIMDUnsignedInitializers10concat16x8ys6SIMD32Vys5UInt8VGs6SIMD16VyAFG_AJtF:
// CHECKO-arm64-NEXT: ret
func concat4x16(_ a: SIMD4<UInt16>, _ b: SIMD4<UInt16>) -> SIMD8<UInt16> {
SIMD8(lowHalf: a, highHalf: b)
}
// CHECK: s24SIMDUnsignedInitializers10concat4x16ys5SIMD8Vys6UInt16VGs5SIMD4VyAFG_AJtF:
// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
// CHECKO-arm64-NEXT: ret
// CHECKO-x86_64: punpcklqdq
func concat8x16(_ a: SIMD8<UInt16>, _ b: SIMD8<UInt16>) -> SIMD16<UInt16> {
SIMD16(lowHalf: a, highHalf: b)
}
// CHECK: s24SIMDUnsignedInitializers10concat8x16ys6SIMD16Vys6UInt16VGs5SIMD8VyAFG_AJtF:
// CHECKO-arm64-NEXT: ret
func concat2x32(_ a: SIMD2<UInt32>, _ b: SIMD2<UInt32>) -> SIMD4<UInt32> {
SIMD4(lowHalf: a, highHalf: b)
}
// CHECK: s24SIMDUnsignedInitializers10concat2x32ys5SIMD4Vys6UInt32VGs5SIMD2VyAFG_AJtF:
// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
// CHECKO-arm64-NEXT: ret
// CHECKO-x86_64: punpcklqdq
func concat4x32(_ a: SIMD4<UInt32>, _ b: SIMD4<UInt32>) -> SIMD8<UInt32> {
SIMD8(lowHalf: a, highHalf: b)
}
// CHECK: s24SIMDUnsignedInitializers10concat4x32ys5SIMD8Vys6UInt32VGs5SIMD4VyAFG_AJtF:
// CHECKO-arm64-NEXT: ret
func concat2x64(_ a: SIMD2<UInt64>, _ b: SIMD2<UInt64>) -> SIMD4<UInt64> {
SIMD4(lowHalf: a, highHalf: b)
}
// CHECK: s24SIMDUnsignedInitializers10concat2x64ys5SIMD4Vys6UInt64VGs5SIMD2VyAFG_AJtF:
// CHECKO-arm64-NEXT: ret