mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
WIP to add more overloads to optimize SIMD codegen on concrete types. Here we do: - init(repeating:) - init(lowHalf:highHalf:) These are always inlined, even in debug, since LLVM knows how to lower them to one or two instructions on the targets that we care about.
86 lines
3.2 KiB
Swift
86 lines
3.2 KiB
Swift
//===--- SIMDFloatInitializers.swift.gyb -------------------*- swift -*-===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2025 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
// RUN: %empty-directory(%t)
|
|
// RUN: %gyb %s -o %t/SIMDFloatInitializers.swift
|
|
// RUN: %target-swift-frontend -primary-file %t/SIMDFloatInitializers.swift -S | %FileCheck %t/SIMDFloatInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
|
|
// RUN: %target-swift-frontend -primary-file %t/SIMDFloatInitializers.swift -S -O | %FileCheck %t/SIMDFloatInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
|
|
|
|
// Disable this test for now because aEIC/transparent functions still are not
|
|
// correctly differentiable, and so these inits are suppressed in the stdlib.
|
|
// REQUIRES: differentiable-aEIC-transparent
|
|
|
|
import Swift
|
|
|
|
%for bits in [16,32,64]:
|
|
% scalar = {16:'Float16',32:'Float',64:'Double'}[bits]
|
|
% for totalBits in [64,128]:
|
|
% n = totalBits // bits
|
|
% if n != 1:
|
|
% neonSuffix = str(n) + {8:'b',16:'h',32:'s',64:'d'}[bits]
|
|
% if bits == 16:
|
|
#if arch(arm64)
|
|
@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
|
|
% end
|
|
func repeating${n}x${bits}(_ scalar: ${scalar}) -> SIMD${n}<${scalar}> {
|
|
SIMD${n}(repeating: scalar)
|
|
}
|
|
% if bits == 16:
|
|
#endif
|
|
// CHECK-arm64: repeating${n}x${bits}{{[[:alnum:]]+}}:
|
|
% else:
|
|
// CHECK: repeating${n}x${bits}{{[[:alnum:]]+}}:
|
|
% end
|
|
// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, v0[0]
|
|
// CHECKO-arm64-NEXT: ret
|
|
// CHECKOnone-arm64: dup.${neonSuffix}
|
|
// CHECKOnone-arm64: ret
|
|
|
|
% end
|
|
% end
|
|
%end
|
|
|
|
#if arch(arm64)
|
|
@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
|
|
func concat4x16(_ a: SIMD4<Float16>, _ b: SIMD4<Float16>) -> SIMD8<Float16> {
|
|
SIMD8(lowHalf: a, highHalf: b)
|
|
}
|
|
// CHECK-arm64: s21SIMDFloatInitializers10concat4x16ys5SIMD8Vys7Float16VGs5SIMD4VyAFG_AJtF:
|
|
// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
|
|
// CHECKO-arm64-NEXT: ret
|
|
|
|
@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
|
|
func concat8x16(_ a: SIMD8<Float16>, _ b: SIMD8<Float16>) -> SIMD16<Float16> {
|
|
SIMD16(lowHalf: a, highHalf: b)
|
|
}
|
|
// CHECK-arm64: s21SIMDFloatInitializers10concat8x16ys6SIMD16Vys7Float16VGs5SIMD8VyAFG_AJtF:
|
|
// CHECKO-arm64-NEXT: ret
|
|
#endif
|
|
|
|
func concat2x32(_ a: SIMD2<Float>, _ b: SIMD2<Float>) -> SIMD4<Float> {
|
|
SIMD4(lowHalf: a, highHalf: b)
|
|
}
|
|
// CHECK: s21SIMDFloatInitializers10concat2x32ys5SIMD4VySfGs5SIMD2VySfG_AHtF:
|
|
// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
|
|
// CHECKO-arm64-NEXT: ret
|
|
|
|
func concat4x32(_ a: SIMD4<Float>, _ b: SIMD4<Float>) -> SIMD8<Float> {
|
|
SIMD8(lowHalf: a, highHalf: b)
|
|
}
|
|
// CHECK: s21SIMDFloatInitializers10concat4x32ys5SIMD8VySfGs5SIMD4VySfG_AHtF:
|
|
// CHECKO-arm64-NEXT: ret
|
|
|
|
func concat2x64(_ a: SIMD2<Double>, _ b: SIMD2<Double>) -> SIMD4<Double> {
|
|
SIMD4(lowHalf: a, highHalf: b)
|
|
}
|
|
// CHECK: s21SIMDFloatInitializers10concat2x64ys5SIMD4VySdGs5SIMD2VySdG_AHtF:
|
|
// CHECKO-arm64-NEXT: ret
|