Files
swift-mirror/test/stdlib/SIMDFloatInitializers.swift.gyb
Stephen Canon 592d72bba9 Concrete SIMD.init(repeating:) and SIMD.init(lowHalf:highHalf:) optimizations (#81766)
WIP to add more overloads to optimize SIMD codegen on concrete types.
Here we do:

- init(repeating:)
- init(lowHalf:highHalf:)

These are always inlined, even in debug, since LLVM knows how to lower
them to one or two instructions on the targets that we care about.
2025-05-27 15:15:13 -04:00

86 lines
3.2 KiB
Swift

//===--- SIMDFloatInitializers.swift.gyb -------------------*- swift -*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2025 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
// RUN: %empty-directory(%t)
// RUN: %gyb %s -o %t/SIMDFloatInitializers.swift
// RUN: %target-swift-frontend -primary-file %t/SIMDFloatInitializers.swift -S | %FileCheck %t/SIMDFloatInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
// RUN: %target-swift-frontend -primary-file %t/SIMDFloatInitializers.swift -S -O | %FileCheck %t/SIMDFloatInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
// Disable this test for now because aEIC/transparent functions still are not
// correctly differentiable, and so these inits are suppressed in the stdlib.
// REQUIRES: differentiable-aEIC-transparent
import Swift
%for bits in [16,32,64]:
% scalar = {16:'Float16',32:'Float',64:'Double'}[bits]
% for totalBits in [64,128]:
% n = totalBits // bits
% if n != 1:
% neonSuffix = str(n) + {8:'b',16:'h',32:'s',64:'d'}[bits]
% if bits == 16:
#if arch(arm64)
@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
% end
func repeating${n}x${bits}(_ scalar: ${scalar}) -> SIMD${n}<${scalar}> {
SIMD${n}(repeating: scalar)
}
% if bits == 16:
#endif
// CHECK-arm64: repeating${n}x${bits}{{[[:alnum:]]+}}:
% else:
// CHECK: repeating${n}x${bits}{{[[:alnum:]]+}}:
% end
// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, v0[0]
// CHECKO-arm64-NEXT: ret
// CHECKOnone-arm64: dup.${neonSuffix}
// CHECKOnone-arm64: ret
% end
% end
%end
#if arch(arm64)
@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
func concat4x16(_ a: SIMD4<Float16>, _ b: SIMD4<Float16>) -> SIMD8<Float16> {
SIMD8(lowHalf: a, highHalf: b)
}
// CHECK-arm64: s21SIMDFloatInitializers10concat4x16ys5SIMD8Vys7Float16VGs5SIMD4VyAFG_AJtF:
// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
// CHECKO-arm64-NEXT: ret
@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
func concat8x16(_ a: SIMD8<Float16>, _ b: SIMD8<Float16>) -> SIMD16<Float16> {
SIMD16(lowHalf: a, highHalf: b)
}
// CHECK-arm64: s21SIMDFloatInitializers10concat8x16ys6SIMD16Vys7Float16VGs5SIMD8VyAFG_AJtF:
// CHECKO-arm64-NEXT: ret
#endif
func concat2x32(_ a: SIMD2<Float>, _ b: SIMD2<Float>) -> SIMD4<Float> {
SIMD4(lowHalf: a, highHalf: b)
}
// CHECK: s21SIMDFloatInitializers10concat2x32ys5SIMD4VySfGs5SIMD2VySfG_AHtF:
// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
// CHECKO-arm64-NEXT: ret
func concat4x32(_ a: SIMD4<Float>, _ b: SIMD4<Float>) -> SIMD8<Float> {
SIMD8(lowHalf: a, highHalf: b)
}
// CHECK: s21SIMDFloatInitializers10concat4x32ys5SIMD8VySfGs5SIMD4VySfG_AHtF:
// CHECKO-arm64-NEXT: ret
func concat2x64(_ a: SIMD2<Double>, _ b: SIMD2<Double>) -> SIMD4<Double> {
SIMD4(lowHalf: a, highHalf: b)
}
// CHECK: s21SIMDFloatInitializers10concat2x64ys5SIMD4VySdGs5SIMD2VySdG_AHtF:
// CHECKO-arm64-NEXT: ret