Concrete SIMD.init(repeating:) and SIMD.init(lowHalf:highHalf:) optimizations (#81766)

WIP to add more overloads to optimize SIMD codegen on concrete types. Here we do: - init(repeating:) - init(lowHalf:highHalf:) These are always inlined, even in debug, since LLVM knows how to lower them to one or two instructions on the targets that we care about.
2025-12-14 20:36:38 +01:00 · 2025-05-27 15:15:13 -04:00
parent 746414bcd8
commit 592d72bba9
12 changed files with 865 additions and 378 deletions
--- a/Runtimes/Core/core/CMakeLists.txt
+++ b/Runtimes/Core/core/CMakeLists.txt
@@ -264,8 +264,14 @@ if(SwiftCore_ENABLE_COMMANDLINE_SUPPORT)
 endif()

 if(SwiftCore_ENABLE_VECTOR_TYPES)
-  gyb_expand(SIMDConcreteOperations.swift.gyb
-    SIMDConcreteOperations.swift
+  gyb_expand(SIMDFloatConcreteOperations.swift.gyb
+    SIMDFloatConcreteOperations.swift
+    FLAGS "-DCMAKE_SIZEOF_VOID_P=${SwiftCore_SIZEOF_POINTER}")
+  gyb_expand(SIMDIntegerConcreteOperations.swift.gyb
+    SIMDIntegerConcreteOperations.swift
+    FLAGS "-DCMAKE_SIZEOF_VOID_P=${SwiftCore_SIZEOF_POINTER}")
+  gyb_expand(SIMDMaskConcreteOperations.swift.gyb
+    SIMDMaskConcreteOperations.swift
    FLAGS "-DCMAKE_SIZEOF_VOID_P=${SwiftCore_SIZEOF_POINTER}")
  gyb_expand(SIMDVectorTypes.swift.gyb
    SIMDVectorTypes.swift
@@ -273,7 +279,9 @@ if(SwiftCore_ENABLE_VECTOR_TYPES)

  target_sources(swiftCore PRIVATE
    SIMDVector.swift
-    "${CMAKE_CURRENT_BINARY_DIR}/SIMDConcreteOperations.swift"
+    "${CMAKE_CURRENT_BINARY_DIR}/SIMDFloatConcreteOperations.swift"
+    "${CMAKE_CURRENT_BINARY_DIR}/SIMDIntegerConcreteOperations.swift"
+    "${CMAKE_CURRENT_BINARY_DIR}/SIMDMaskConcreteOperations.swift"
    "${CMAKE_CURRENT_BINARY_DIR}/SIMDVectorTypes.swift")
 endif()

--- a/stdlib/public/core/CMakeLists.txt
+++ b/stdlib/public/core/CMakeLists.txt
@@ -268,11 +268,21 @@ split_embedded_sources(
  )

 if(SWIFT_STDLIB_ENABLE_VECTOR_TYPES)
+  split_embedded_sources(
+    OUT_LIST_EMBEDDED SWIFTLIB_EMBEDDED_VECTOR_GYB_SOURCES
+    OUT_LIST_NORMAL SWIFTLIB_VECTOR_GYB_SOURCES
+    
+    EMBEDDED SIMDIntegerConcreteOperations.swift.gyb
+    EMBEDDED SIMDFloatConcreteOperations.swift.gyb
+    EMBEDDED SIMDMaskConcreteOperations.swift.gyb
+    EMBEDDED SIMDVectorTypes.swift.gyb
+  )
+
  list(APPEND SWIFTLIB_SOURCES SIMDVector.swift)
-  list(APPEND SWIFTLIB_GYB_SOURCES SIMDConcreteOperations.swift.gyb SIMDVectorTypes.swift.gyb)
+  list(APPEND SWIFTLIB_GYB_SOURCES ${SWIFTLIB_VECTOR_GYB_SOURCES})

  list(APPEND SWIFTLIB_EMBEDDED_SOURCES SIMDVector.swift)
-  list(APPEND SWIFTLIB_EMBEDDED_GYB_SOURCES SIMDConcreteOperations.swift.gyb SIMDVectorTypes.swift.gyb)
+  list(APPEND SWIFTLIB_EMBEDDED_GYB_SOURCES ${SWIFTLIB_EMBEDDED_VECTOR_GYB_SOURCES})
 endif()

 # Freestanding and Linux/Android builds both have failures to resolve.
--- a/stdlib/public/core/GroupInfo.json
+++ b/stdlib/public/core/GroupInfo.json
@@ -182,9 +182,12 @@
      "FloatingPointTypes.swift",
      "FloatingPointRandom.swift"],
    "Vector": [
-      "SIMDConcreteOperations.swift",
+      "SIMDIntegerConcreteOperations.swift",
+      "SIMDFloatConcreteOperations.swift",
+      "SIMDMaskConcreteOperations.swift",
      "SIMDVector.swift",
-      "SIMDVectorTypes.swift"]}
+      "SIMDVectorTypes.swift"
+    ]}
  ],
  "Optional": [
    "Optional.swift"
--- a/stdlib/public/core/SIMDConcreteOperations.swift.gyb
+++ b/stdlib/public/core/SIMDConcreteOperations.swift.gyb
@@ -1,371 +0,0 @@
-//===--- SIMDConcreteOperations.swift -------------------------*- swift -*-===//
-//
-// This source file is part of the Swift.org open source project
-//
-// Copyright (c) 2021 Apple Inc. and the Swift project authors
-// Licensed under Apache License v2.0 with Runtime Library Exception
-//
-// See https://swift.org/LICENSE.txt for license information
-// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
-//
-//===----------------------------------------------------------------------===//
-
-%{
-from SwiftIntTypes import all_integer_types
-word_bits = int(CMAKE_SIZEOF_VOID_P) * 8
-storagescalarCounts = [2,4,8,16,32,64]
-vectorscalarCounts = storagescalarCounts + [3]
-}%
-
-%for int in all_integer_types(word_bits):
-% Scalar = int.stdlib_name
-% for n in vectorscalarCounts:
-%  Vector = "SIMD" + str(n) + "<" + Scalar + ">"
-%  storageN = 4 if n == 3 else n
-%  s = "s" if int.is_signed else "u"
-%  Builtin = "Vec" + str(storageN) + "xInt" + str(int.bits)
-%  MaskExt = "Builtin.sext_Vec" + str(storageN) + "xInt1_" + Builtin
-%  if int.is_signed:
-extension SIMDMask where Storage == ${Vector} {
-  @_alwaysEmitIntoClient
-  internal init(_ _builtin: Builtin.${Builtin}) {
-    _storage = ${Vector}(_builtin)
-  }
-  
-  @_alwaysEmitIntoClient
-  internal static var allTrue: Self {
-    let zero = ${Vector}()
-    return zero .== zero
-  }
-  
-  /// A vector mask that is the pointwise logical negation of the input.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// var result = SIMDMask<${Vector}>()
-  /// for i in result.indices {
-  ///   result[i] = !a[i]
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public static prefix func .!(a: Self) -> Self {
-    a .^ .allTrue
-  }
-    
-  /// A vector mask that is the pointwise logical conjunction of the inputs.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// var result = SIMDMask<${Vector}>()
-  /// for i in result.indices {
-  ///   result[i] = a[i] && b[i]
-  /// }
-  /// ```
-  ///
-  /// Note that unlike the scalar `&&` operator, the SIMD `.&` operator
-  /// always fully evaluates both arguments.
-  @_alwaysEmitIntoClient
-  public static func .&(a: Self, b: Self) -> Self {
-    Self(${Vector}(Builtin.and_${Builtin}(
-      a._storage._storage._value,
-      b._storage._storage._value
-    )))
-  }
-    
-  /// Replaces `a` with the pointwise logical conjunction of `a` and `b`.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// for i in a.indices {
-  ///   a[i] = a[i] && b[i]
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public static func .&=(a: inout Self, b: Self) {
-    a = a .& b
-  }
-      
-  /// A vector mask that is the pointwise exclusive or of the inputs.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// var result = SIMDMask<${Vector}>()
-  /// for i in result.indices {
-  ///   result[i] = a[i] != b[i]
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public static func .^(a: Self, b: Self) -> Self {
-    Self(${Vector}(Builtin.xor_${Builtin}(
-      a._storage._storage._value,
-      b._storage._storage._value
-    )))
-  }
-    
-  /// Replaces `a` with the pointwise exclusive or of `a` and `b`.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// for i in a.indices {
-  ///   a[i] = a[i] != b[i]
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public static func .^=(a: inout Self, b: Self) {
-    a = a .^ b
-  }
-      
-  /// A vector mask that is the pointwise logical disjunction of the inputs.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// var result = SIMDMask<${Vector}>()
-  /// for i in result.indices {
-  ///   result[i] = a[i] || b[i]
-  /// }
-  /// ```
-  ///
-  /// Note that unlike the scalar `||` operator, the SIMD `.|` operator
-  /// always fully evaluates both arguments.
-  @_alwaysEmitIntoClient
-  public static func .|(a: Self, b: Self) -> Self {
-    Self(${Vector}(Builtin.or_${Builtin}(
-      a._storage._storage._value,
-      b._storage._storage._value
-    )))
-  }
-    
-  /// Replaces `a` with the pointwise logical disjunction of `a` and `b`.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// for i in a.indices {
-  ///   a[i] = a[i] || b[i]
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public static func .|=(a: inout Self, b: Self) {
-    a = a .| b
-  }
-    
-  /// A vector mask with the result of a pointwise equality comparison.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// var result = SIMDMask<${Vector}>()
-  /// for i in result.indices {
-  ///   result[i] = a[i] == b[i]
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public static func .==(a: Self, b: Self) -> Self {
-    .!(a .^ b)
-  }
-  
-  /// A vector mask with the result of a pointwise inequality comparison.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// var result = SIMDMask<${Vector}>()
-  /// for i in result.indices {
-  ///   result[i] = a[i] != b[i]
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public static func .!=(a: Self, b: Self) -> Self {
-    a .^ b
-  }
-    
-  /// Replaces elements of this vector with elements of `other` in the lanes
-  /// where `mask` is `true`.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// for i in indices {
-  ///   if mask[i] { self[i] = other[i] }
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public mutating func replace(with other: Self, where mask: Self) {
-    self = replacing(with: other, where: mask)
-  }
-    
-  /// Returns a copy of this vector, with elements replaced by elements of
-  /// `other` in the lanes where `mask` is `true`.
-  ///
-  /// Equivalent to:
-  /// ```
-  /// var result = Self()
-  /// for i in indices {
-  ///   result[i] = mask[i] ? other[i] : self[i]
-  /// }
-  /// ```
-  @_alwaysEmitIntoClient
-  public func replacing(with other: Self, where mask: Self) -> Self {
-    (self .& .!mask) .| (other .& mask)
-  }
-}
-
-%  end
-extension SIMD${n} where Scalar == ${Scalar} {
-  @_alwaysEmitIntoClient
-  internal init(_ _builtin: Builtin.${Builtin}) {
-    _storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
-  }
-    
-  /// A vector mask with the result of a pointwise equality comparison.
-  @_alwaysEmitIntoClient
-  public static func .==(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.cmp_eq_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise inequality comparison.
-  @_alwaysEmitIntoClient
-  public static func .!=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.cmp_ne_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise less-than comparison.
-  @_alwaysEmitIntoClient
-  public static func .<(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.cmp_${s}lt_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise less-than-or-equal-to comparison.
-  @_alwaysEmitIntoClient
-  public static func .<=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.cmp_${s}le_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise greater-than comparison.
-  @_alwaysEmitIntoClient
-  public static func .>(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.cmp_${s}gt_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise greater-than-or-equal-to comparison.
-  @_alwaysEmitIntoClient
-  public static func .>=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.cmp_${s}ge_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-    
-  /// The wrapping sum of two vectors.
-  @_alwaysEmitIntoClient
-  public static func &+(a: Self, b: Self) -> Self {
-    Self(Builtin.add_${Builtin}(a._storage._value, b._storage._value))
-  }
-    
-  /// The wrapping difference of two vectors.
-  @_alwaysEmitIntoClient
-  public static func &-(a: Self, b: Self) -> Self {
-    Self(Builtin.sub_${Builtin}(a._storage._value, b._storage._value))
-  }
-    
-  /// The pointwise wrapping product of two vectors.
-  @_alwaysEmitIntoClient
-  public static func &*(a: Self, b: Self) -> Self {
-    Self(Builtin.mul_${Builtin}(a._storage._value, b._storage._value))
-  }
-        
-  /// Updates the left hand side with the wrapping sum of the two
-  /// vectors.
-  @_alwaysEmitIntoClient
-  public static func &+=(a: inout Self, b: Self) { a = a &+ b }
-    
-  /// Updates the left hand side with the wrapping difference of the two
-  /// vectors.
-  @_alwaysEmitIntoClient
-  public static func &-=(a: inout Self, b: Self) { a = a &- b }
-    
-  /// Updates the left hand side with the pointwise wrapping product of two
-  /// vectors.
-  @_alwaysEmitIntoClient
-  public static func &*=(a: inout Self, b: Self) { a = a &* b }
-}
-
-% end
-%end
-
-%for (Scalar, bits) in [('Float16',16), ('Float',32), ('Double',64)]:
-% for n in vectorscalarCounts:
-%  Vector = "SIMD" + str(n) + "<" + Scalar + ">"
-%  storageN = 4 if n == 3 else n
-%  Builtin = "Vec" + str(storageN) + "xFPIEEE" + str(bits)
-%  VecPre = "Vec" + str(storageN) + "x"
-%  MaskExt = "Builtin.sext_" + VecPre + "Int1_" + VecPre + "Int" + str(bits)
-%  if bits == 16:
-#if !((os(macOS) || targetEnvironment(macCatalyst)) && arch(x86_64))
-@available(SwiftStdlib 5.3, *)
-%  end
-extension SIMD${n} where Scalar == ${Scalar} {
-  @_alwaysEmitIntoClient
-  internal init(_ _builtin: Builtin.${Builtin}) {
-    _storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
-  }
-  
-  /// A vector mask with the result of a pointwise equality comparison.
-  @_alwaysEmitIntoClient
-  public static func .==(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_oeq_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise inequality comparison.
-  @_alwaysEmitIntoClient
-  public static func .!=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_une_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise less-than comparison.
-  @_alwaysEmitIntoClient
-  public static func .<(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_olt_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise less-than-or-equal-to comparison.
-  @_alwaysEmitIntoClient
-  public static func .<=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_ole_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise greater-than comparison.
-  @_alwaysEmitIntoClient
-  public static func .>(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_ogt_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-  
-  /// A vector mask with the result of a pointwise greater-than-or-equal-to comparison.
-  @_alwaysEmitIntoClient
-  public static func .>=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
-    SIMDMask<MaskStorage>(${MaskExt}(
-      Builtin.fcmp_oge_${Builtin}(a._storage._value, b._storage._value)
-    ))
-  }
-}
-%  if bits == 16:
-#endif
-%  end
-
-% end
-%end
--- a/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
+++ b/stdlib/public/core/SIMDFloatConcreteOperations.swift.gyb
@@ -0,0 +1,120 @@
+//===--- SIMDFloatConcreteOperations.swift --------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2021-2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+
+%{
+from SwiftIntTypes import all_integer_types
+word_bits = int(CMAKE_SIZEOF_VOID_P) * 8
+storagescalarCounts = [2,4,8,16,32,64]
+vectorscalarCounts = storagescalarCounts + [3]
+}%
+
+%for (Scalar, bits) in [('Float16',16), ('Float',32), ('Double',64)]:
+% for n in vectorscalarCounts:
+%  Vector = "SIMD" + str(n) + "<" + Scalar + ">"
+%  storageN = 4 if n == 3 else n
+%  Builtin = "Vec" + str(storageN) + "xFPIEEE" + str(bits)
+%  VecPre = "Vec" + str(storageN) + "x"
+%  MaskExt = "Builtin.sext_" + VecPre + "Int1_" + VecPre + "Int" + str(bits)
+%  if bits == 16:
+#if !((os(macOS) || targetEnvironment(macCatalyst)) && arch(x86_64))
+@available(SwiftStdlib 5.3, *)
+%  end
+extension SIMD${n} where Scalar == ${Scalar} {
+  @_alwaysEmitIntoClient @_transparent
+  internal init(_ _builtin: Builtin.${Builtin}) {
+    _storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
+  }
+  
+  /* Breaks differentiation testing, commented out while we figure out
+     what to do about that.
+  @_alwaysEmitIntoClient @_transparent
+  public init(repeating scalar: ${Scalar}) {
+    let asVector = Builtin.insertelement_${Builtin}_FPIEEE${bits}_Int32(
+      Builtin.zeroInitializer(), scalar._value, Builtin.zeroInitializer()
+    )
+    let repeated = Builtin.shufflevector_${Builtin}_Vec${storageN}xInt32(
+      asVector, Builtin.zeroInitializer(), Builtin.zeroInitializer()
+    )
+%if n != 3:
+    self.init(repeated)
+%else:
+    self.init(Builtin.insertelement_${Builtin}_FPIEEE${bits}_Int32(
+      repeated, Builtin.zeroInitializer(), Int32(3)._value
+    ))
+%end
+  }
+  */
+  
+%  if n >= 4:
+  @_alwaysEmitIntoClient @_transparent
+  public init(
+    lowHalf: SIMD${n//2}<${Scalar}>,
+    highHalf: SIMD${n//2}<${Scalar}>
+  ) {
+    self = unsafe unsafeBitCast((lowHalf, highHalf), to: Self.self)
+  }
+  
+%  end
+  /// A vector mask with the result of a pointwise equality comparison.
+  @_alwaysEmitIntoClient
+  public static func .==(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.fcmp_oeq_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise inequality comparison.
+  @_alwaysEmitIntoClient
+  public static func .!=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.fcmp_une_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise less-than comparison.
+  @_alwaysEmitIntoClient
+  public static func .<(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.fcmp_olt_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise less-than-or-equal-to comparison.
+  @_alwaysEmitIntoClient
+  public static func .<=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.fcmp_ole_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise greater-than comparison.
+  @_alwaysEmitIntoClient
+  public static func .>(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.fcmp_ogt_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise greater-than-or-equal-to comparison.
+  @_alwaysEmitIntoClient
+  public static func .>=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.fcmp_oge_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+}
+%  if bits == 16:
+#endif
+%  end
+
+% end
+%end
--- a/stdlib/public/core/SIMDIntegerConcreteOperations.swift.gyb
+++ b/stdlib/public/core/SIMDIntegerConcreteOperations.swift.gyb
@@ -0,0 +1,144 @@
+//===--- SIMDIntegerConcreteOperations.swift ------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2021-2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+
+%{
+from SwiftIntTypes import all_integer_types
+word_bits = int(CMAKE_SIZEOF_VOID_P) * 8
+storagescalarCounts = [2,4,8,16,32,64]
+vectorscalarCounts = storagescalarCounts + [3]
+}%
+
+%for int in all_integer_types(word_bits):
+% Scalar = int.stdlib_name
+% for n in vectorscalarCounts:
+%  Vector = "SIMD" + str(n) + "<" + Scalar + ">"
+%  storageN = 4 if n == 3 else n
+%  s = "s" if int.is_signed else "u"
+%  Builtin = "Vec" + str(storageN) + "xInt" + str(int.bits)
+%  MaskExt = "Builtin.sext_Vec" + str(storageN) + "xInt1_" + Builtin
+extension SIMD${n} where Scalar == ${Scalar} {
+  @_alwaysEmitIntoClient @_transparent
+  internal init(_ _builtin: Builtin.${Builtin}) {
+    _storage = ${Scalar}.SIMD${storageN}Storage(_builtin)
+  }
+  
+  @_alwaysEmitIntoClient @_transparent
+  public init(repeating scalar: ${Scalar}) {
+    let asVector = Builtin.insertelement_${Builtin}_Int${int.bits}_Int32(
+      Builtin.zeroInitializer(), scalar._value, Builtin.zeroInitializer()
+    )
+    let repeated = Builtin.shufflevector_${Builtin}_Vec${storageN}xInt32(
+      asVector, Builtin.zeroInitializer(), Builtin.zeroInitializer()
+    )
+%   if n != 3:
+    self.init(repeated)
+%   else:
+    self.init(Builtin.insertelement_${Builtin}_Int${int.bits}_Int32(
+      repeated, Builtin.zeroInitializer(), Int32(3)._value
+    ))
+%   end
+  }
+  
+%  if n >= 4:
+  @_alwaysEmitIntoClient @_transparent
+  public init(
+    lowHalf: SIMD${n//2}<${Scalar}>,
+    highHalf: SIMD${n//2}<${Scalar}>
+  ) {
+    self = unsafe unsafeBitCast((lowHalf, highHalf), to: Self.self)
+  }
+  
+%  end
+  /// A vector mask with the result of a pointwise equality comparison.
+  @_alwaysEmitIntoClient
+  public static func .==(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.cmp_eq_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise inequality comparison.
+  @_alwaysEmitIntoClient
+  public static func .!=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.cmp_ne_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise less-than comparison.
+  @_alwaysEmitIntoClient
+  public static func .<(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.cmp_${s}lt_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise less-than-or-equal-to comparison.
+  @_alwaysEmitIntoClient
+  public static func .<=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.cmp_${s}le_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise greater-than comparison.
+  @_alwaysEmitIntoClient
+  public static func .>(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.cmp_${s}gt_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+  
+  /// A vector mask with the result of a pointwise greater-than-or-equal-to comparison.
+  @_alwaysEmitIntoClient
+  public static func .>=(a: Self, b: Self) -> SIMDMask<MaskStorage> {
+    SIMDMask<MaskStorage>(${MaskExt}(
+      Builtin.cmp_${s}ge_${Builtin}(a._storage._value, b._storage._value)
+    ))
+  }
+    
+  /// The wrapping sum of two vectors.
+  @_alwaysEmitIntoClient
+  public static func &+(a: Self, b: Self) -> Self {
+    Self(Builtin.add_${Builtin}(a._storage._value, b._storage._value))
+  }
+    
+  /// The wrapping difference of two vectors.
+  @_alwaysEmitIntoClient
+  public static func &-(a: Self, b: Self) -> Self {
+    Self(Builtin.sub_${Builtin}(a._storage._value, b._storage._value))
+  }
+    
+  /// The pointwise wrapping product of two vectors.
+  @_alwaysEmitIntoClient
+  public static func &*(a: Self, b: Self) -> Self {
+    Self(Builtin.mul_${Builtin}(a._storage._value, b._storage._value))
+  }
+        
+  /// Updates the left hand side with the wrapping sum of the two
+  /// vectors.
+  @_alwaysEmitIntoClient
+  public static func &+=(a: inout Self, b: Self) { a = a &+ b }
+    
+  /// Updates the left hand side with the wrapping difference of the two
+  /// vectors.
+  @_alwaysEmitIntoClient
+  public static func &-=(a: inout Self, b: Self) { a = a &- b }
+    
+  /// Updates the left hand side with the pointwise wrapping product of two
+  /// vectors.
+  @_alwaysEmitIntoClient
+  public static func &*=(a: inout Self, b: Self) { a = a &* b }
+}
+
+% end
+%end
--- a/stdlib/public/core/SIMDMaskConcreteOperations.swift.gyb
+++ b/stdlib/public/core/SIMDMaskConcreteOperations.swift.gyb
@@ -0,0 +1,227 @@
+//===--- SIMDMaskConcreteOperations.swift ---------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2021-2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+
+%{
+from SwiftIntTypes import all_signed_types
+word_bits = int(CMAKE_SIZEOF_VOID_P) * 8
+storagescalarCounts = [2,4,8,16,32,64]
+vectorscalarCounts = storagescalarCounts + [3]
+}%
+
+%for int in all_signed_types(word_bits):
+% Scalar = int.stdlib_name
+% for n in vectorscalarCounts:
+%  Vector = "SIMD" + str(n) + "<" + Scalar + ">"
+%  storageN = 4 if n == 3 else n
+%  s = "s"
+%  Builtin = "Vec" + str(storageN) + "xInt" + str(int.bits)
+%  MaskExt = "Builtin.sext_Vec" + str(storageN) + "xInt1_" + Builtin
+extension SIMDMask where Storage == ${Vector} {
+  @_alwaysEmitIntoClient @_transparent
+  internal init(_ _builtin: Builtin.${Builtin}) {
+    _storage = ${Vector}(_builtin)
+  }
+  
+  @_alwaysEmitIntoClient @_transparent
+  public init(repeating scalar: Bool) {
+    _storage = ${Vector}(repeating: scalar ? -1 : 0)
+  }
+  
+%  if n >= 4:
+  @_alwaysEmitIntoClient @_transparent
+  public init(
+    lowHalf: SIMDMask<SIMD${n//2}<${Scalar}>>,
+    highHalf: SIMDMask<SIMD${n//2}<${Scalar}>>
+  ) {
+    _storage = ${Vector}(
+      lowHalf: lowHalf._storage,
+      highHalf: highHalf._storage
+    )
+  }
+  
+%  end
+  @_alwaysEmitIntoClient
+  internal static var allTrue: Self {
+    let zero = ${Vector}()
+    return zero .== zero
+  }
+  
+  /// A vector mask that is the pointwise logical negation of the input.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMDMask<${Vector}>()
+  /// for i in result.indices {
+  ///   result[i] = !a[i]
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public static prefix func .!(a: Self) -> Self {
+    a .^ .allTrue
+  }
+    
+  /// A vector mask that is the pointwise logical conjunction of the inputs.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMDMask<${Vector}>()
+  /// for i in result.indices {
+  ///   result[i] = a[i] && b[i]
+  /// }
+  /// ```
+  ///
+  /// Note that unlike the scalar `&&` operator, the SIMD `.&` operator
+  /// always fully evaluates both arguments.
+  @_alwaysEmitIntoClient
+  public static func .&(a: Self, b: Self) -> Self {
+    Self(${Vector}(Builtin.and_${Builtin}(
+      a._storage._storage._value,
+      b._storage._storage._value
+    )))
+  }
+    
+  /// Replaces `a` with the pointwise logical conjunction of `a` and `b`.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// for i in a.indices {
+  ///   a[i] = a[i] && b[i]
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public static func .&=(a: inout Self, b: Self) {
+    a = a .& b
+  }
+      
+  /// A vector mask that is the pointwise exclusive or of the inputs.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMDMask<${Vector}>()
+  /// for i in result.indices {
+  ///   result[i] = a[i] != b[i]
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public static func .^(a: Self, b: Self) -> Self {
+    Self(${Vector}(Builtin.xor_${Builtin}(
+      a._storage._storage._value,
+      b._storage._storage._value
+    )))
+  }
+    
+  /// Replaces `a` with the pointwise exclusive or of `a` and `b`.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// for i in a.indices {
+  ///   a[i] = a[i] != b[i]
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public static func .^=(a: inout Self, b: Self) {
+    a = a .^ b
+  }
+      
+  /// A vector mask that is the pointwise logical disjunction of the inputs.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMDMask<${Vector}>()
+  /// for i in result.indices {
+  ///   result[i] = a[i] || b[i]
+  /// }
+  /// ```
+  ///
+  /// Note that unlike the scalar `||` operator, the SIMD `.|` operator
+  /// always fully evaluates both arguments.
+  @_alwaysEmitIntoClient
+  public static func .|(a: Self, b: Self) -> Self {
+    Self(${Vector}(Builtin.or_${Builtin}(
+      a._storage._storage._value,
+      b._storage._storage._value
+    )))
+  }
+    
+  /// Replaces `a` with the pointwise logical disjunction of `a` and `b`.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// for i in a.indices {
+  ///   a[i] = a[i] || b[i]
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public static func .|=(a: inout Self, b: Self) {
+    a = a .| b
+  }
+    
+  /// A vector mask with the result of a pointwise equality comparison.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMDMask<${Vector}>()
+  /// for i in result.indices {
+  ///   result[i] = a[i] == b[i]
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public static func .==(a: Self, b: Self) -> Self {
+    .!(a .^ b)
+  }
+  
+  /// A vector mask with the result of a pointwise inequality comparison.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = SIMDMask<${Vector}>()
+  /// for i in result.indices {
+  ///   result[i] = a[i] != b[i]
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public static func .!=(a: Self, b: Self) -> Self {
+    a .^ b
+  }
+    
+  /// Replaces elements of this vector with elements of `other` in the lanes
+  /// where `mask` is `true`.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// for i in indices {
+  ///   if mask[i] { self[i] = other[i] }
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public mutating func replace(with other: Self, where mask: Self) {
+    self = replacing(with: other, where: mask)
+  }
+    
+  /// Returns a copy of this vector, with elements replaced by elements of
+  /// `other` in the lanes where `mask` is `true`.
+  ///
+  /// Equivalent to:
+  /// ```
+  /// var result = Self()
+  /// for i in indices {
+  ///   result[i] = mask[i] ? other[i] : self[i]
+  /// }
+  /// ```
+  @_alwaysEmitIntoClient
+  public func replacing(with other: Self, where mask: Self) -> Self {
+    (self .& .!mask) .| (other .& mask)
+  }
+}
+
+% end
+%end
--- a/test/stdlib/SIMDFloatInitializers.swift.gyb
+++ b/test/stdlib/SIMDFloatInitializers.swift.gyb
@@ -0,0 +1,85 @@
+//===--- SIMDFloatInitializers.swift.gyb -------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+// RUN: %empty-directory(%t)
+// RUN: %gyb %s -o %t/SIMDFloatInitializers.swift
+// RUN: %target-swift-frontend -primary-file %t/SIMDFloatInitializers.swift -S | %FileCheck %t/SIMDFloatInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
+// RUN: %target-swift-frontend -primary-file %t/SIMDFloatInitializers.swift -S -O | %FileCheck %t/SIMDFloatInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
+
+// Disable this test for now because aEIC/transparent functions still are not
+// correctly differentiable, and so these inits are suppressed in the stdlib.
+// REQUIRES: differentiable-aEIC-transparent
+
+import Swift
+
+%for bits in [16,32,64]:
+% scalar = {16:'Float16',32:'Float',64:'Double'}[bits]
+% for totalBits in [64,128]:
+%  n = totalBits // bits
+%  if n != 1:
+%   neonSuffix = str(n) + {8:'b',16:'h',32:'s',64:'d'}[bits]
+%   if bits == 16:
+#if arch(arm64)
+@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
+%   end
+func repeating${n}x${bits}(_ scalar: ${scalar}) -> SIMD${n}<${scalar}> {
+  SIMD${n}(repeating: scalar)
+}
+%   if bits == 16:
+#endif
+// CHECK-arm64: repeating${n}x${bits}{{[[:alnum:]]+}}:
+%   else:
+// CHECK: repeating${n}x${bits}{{[[:alnum:]]+}}:
+%   end
+// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, v0[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKOnone-arm64: dup.${neonSuffix}
+// CHECKOnone-arm64: ret
+
+%  end
+% end
+%end
+
+#if arch(arm64)
+@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
+func concat4x16(_ a: SIMD4<Float16>, _ b: SIMD4<Float16>) -> SIMD8<Float16> {
+  SIMD8(lowHalf: a, highHalf: b)
+}
+// CHECK-arm64: s21SIMDFloatInitializers10concat4x16ys5SIMD8Vys7Float16VGs5SIMD4VyAFG_AJtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+
+@available(macOS 11.0, iOS 14.0, tvOS 14.0, watchOS 7.0, *)
+func concat8x16(_ a: SIMD8<Float16>, _ b: SIMD8<Float16>) -> SIMD16<Float16> {
+  SIMD16(lowHalf: a, highHalf: b)
+}
+// CHECK-arm64: s21SIMDFloatInitializers10concat8x16ys6SIMD16Vys7Float16VGs5SIMD8VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
+#endif
+
+func concat2x32(_ a: SIMD2<Float>, _ b: SIMD2<Float>) -> SIMD4<Float> {
+  SIMD4(lowHalf: a, highHalf: b)
+}
+// CHECK: s21SIMDFloatInitializers10concat2x32ys5SIMD4VySfGs5SIMD2VySfG_AHtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+
+func concat4x32(_ a: SIMD4<Float>, _ b: SIMD4<Float>) -> SIMD8<Float> {
+  SIMD8(lowHalf: a, highHalf: b)
+}
+// CHECK: s21SIMDFloatInitializers10concat4x32ys5SIMD8VySfGs5SIMD4VySfG_AHtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat2x64(_ a: SIMD2<Double>, _ b: SIMD2<Double>) -> SIMD4<Double> {
+  SIMD4(lowHalf: a, highHalf: b)
+}
+// CHECK: s21SIMDFloatInitializers10concat2x64ys5SIMD4VySdGs5SIMD2VySdG_AHtF:
+// CHECKO-arm64-NEXT: ret
--- a/test/stdlib/SIMDMaskInitializers.swift.gyb
+++ b/test/stdlib/SIMDMaskInitializers.swift.gyb
@@ -0,0 +1,84 @@
+//===--- SIMDMaskInitializers.swift.gyb -------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+// RUN: %empty-directory(%t)
+// RUN: %gyb %s -o %t/SIMDMaskInitializers.swift
+// RUN: %target-swift-frontend -primary-file %t/SIMDMaskInitializers.swift -S | %FileCheck %t/SIMDMaskInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
+// RUN: %target-swift-frontend -primary-file %t/SIMDMaskInitializers.swift -S -O | %FileCheck %t/SIMDMaskInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
+
+import Swift
+
+%for bits in [8,16,32,64]:
+% for totalBits in [64,128]:
+%  n = totalBits // bits
+%  if n != 1:
+%   neonSuffix = str(n) + {8:'b',16:'h',32:'s',64:'d'}[bits]
+func repeating${n}_mask${bits}(_ scalar: Bool) -> SIMDMask<SIMD${n}<Int${bits}>> {
+  SIMDMask(repeating: scalar)
+}
+// CHECK: repeating${n}_mask${bits}{{[[:alnum:]]+}}:
+// CHECKO-arm64-NEXT: sbfx [[REG:[wx][0-9]]], {{[wx]}}0, #0, #1
+// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, [[REG]]
+// CHECKO-arm64-NEXT: ret
+// CHECKOnone-arm64: dup.${neonSuffix}
+// CHECKOnone-arm64: ret
+
+%  end
+% end
+%end
+
+func concat8x8(_ a: SIMDMask<SIMD8<Int8>>, _ b: SIMDMask<SIMD8<Int8>>) -> SIMDMask<SIMD16<Int8>> {
+  SIMDMask(lowHalf: a, highHalf: b)
+}
+// CHECK: s20SIMDMaskInitializers9concat8x8ys0A0Vys6SIMD16Vys4Int8VGGADys5SIMD8VyAHGG_ANtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat16x8(_ a: SIMDMask<SIMD16<Int8>>, _ b: SIMDMask<SIMD16<Int8>>) -> SIMDMask<SIMD32<Int8>> {
+  SIMDMask(lowHalf: a, highHalf: b)
+}
+// CHECK: s20SIMDMaskInitializers10concat16x8ys0A0Vys6SIMD32Vys4Int8VGGADys6SIMD16VyAHGG_ANtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat4x16(_ a: SIMDMask<SIMD4<Int16>>, _ b: SIMDMask<SIMD4<Int16>>) -> SIMDMask<SIMD8<Int16>> {
+  SIMDMask(lowHalf: a, highHalf: b)
+}
+// CHECK: s20SIMDMaskInitializers10concat4x16ys0A0Vys5SIMD8Vys5Int16VGGADys5SIMD4VyAHGG_ANtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat8x16(_ a: SIMDMask<SIMD8<Int16>>, _ b: SIMDMask<SIMD8<Int16>>) -> SIMDMask<SIMD16<Int16>> {
+  SIMDMask(lowHalf: a, highHalf: b)
+}
+// CHECK: s20SIMDMaskInitializers10concat8x16ys0A0Vys6SIMD16Vys5Int16VGGADys5SIMD8VyAHGG_ANtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat2x32(_ a: SIMDMask<SIMD2<Int32>>, _ b: SIMDMask<SIMD2<Int32>>) -> SIMDMask<SIMD4<Int32>> {
+  SIMDMask(lowHalf: a, highHalf: b)
+}
+// CHECK: s20SIMDMaskInitializers10concat2x32ys0A0Vys5SIMD4Vys5Int32VGGADys5SIMD2VyAHGG_ANtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat4x32(_ a: SIMDMask<SIMD4<Int32>>, _ b: SIMDMask<SIMD4<Int32>>) -> SIMDMask<SIMD8<Int32>> {
+  SIMDMask(lowHalf: a, highHalf: b)
+}
+// CHECK: s20SIMDMaskInitializers10concat4x32ys0A0Vys5SIMD8Vys5Int32VGGADys5SIMD4VyAHGG_ANtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat2x64(_ a: SIMDMask<SIMD2<Int64>>, _ b: SIMDMask<SIMD2<Int64>>) -> SIMDMask<SIMD4<Int64>> {
+  SIMDMask(lowHalf: a, highHalf: b)
+}
+// CHECK: s20SIMDMaskInitializers10concat2x64ys0A0Vys5SIMD4Vys5Int64VGGADys5SIMD2VyAHGG_ANtF:
+// CHECKO-arm64-NEXT: ret
--- a/test/stdlib/SIMDSignedInitializers.swift.gyb
+++ b/test/stdlib/SIMDSignedInitializers.swift.gyb
@@ -0,0 +1,84 @@
+//===--- SIMDSignedInitializers.swift.gyb ---------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+// RUN: %empty-directory(%t)
+// RUN: %gyb %s -o %t/SIMDSignedInitializers.swift
+// RUN: %target-swift-frontend -primary-file %t/SIMDSignedInitializers.swift -S | %FileCheck %t/SIMDSignedInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
+// RUN: %target-swift-frontend -primary-file %t/SIMDSignedInitializers.swift -S -O | %FileCheck %t/SIMDSignedInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
+
+import Swift
+
+%for bits in [8,16,32,64]:
+% for totalBits in [64,128]:
+%  n = totalBits // bits
+%  if n != 1:
+%   neonSuffix = str(n) + {8:'b',16:'h',32:'s',64:'d'}[bits]
+
+func repeating${n}_int${bits}(_ scalar: Int${bits}) -> SIMD${n}<Int${bits}> {
+  SIMD${n}(repeating: scalar)
+}
+// CHECK: repeating${n}_int${bits}{{[[:alnum:]]+}}:
+// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, {{[wx]}}0
+// CHECKO-arm64-NEXT: ret
+// CHECKOnone-arm64: dup.${neonSuffix}
+// CHECKOnone-arm64: ret
+%  end
+% end
+%end
+
+func concat8x8(_ a: SIMD8<Int8>, _ b: SIMD8<Int8>) -> SIMD16<Int8> {
+  SIMD16(lowHalf: a, highHalf: b)
+}
+// CHECK: s22SIMDSignedInitializers9concat8x8ys6SIMD16Vys4Int8VGs5SIMD8VyAFG_AJtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat16x8(_ a: SIMD16<Int8>, _ b: SIMD16<Int8>) -> SIMD32<Int8> {
+  SIMD32(lowHalf: a, highHalf: b)
+}
+// CHECK: s22SIMDSignedInitializers10concat16x8ys6SIMD32Vys4Int8VGs6SIMD16VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat4x16(_ a: SIMD4<Int16>, _ b: SIMD4<Int16>) -> SIMD8<Int16> {
+  SIMD8(lowHalf: a, highHalf: b)
+}
+// CHECK: s22SIMDSignedInitializers10concat4x16ys5SIMD8Vys5Int16VGs5SIMD4VyAFG_AJtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat8x16(_ a: SIMD8<Int16>, _ b: SIMD8<Int16>) -> SIMD16<Int16> {
+  SIMD16(lowHalf: a, highHalf: b)
+}
+// CHECK: s22SIMDSignedInitializers10concat8x16ys6SIMD16Vys5Int16VGs5SIMD8VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat2x32(_ a: SIMD2<Int32>, _ b: SIMD2<Int32>) -> SIMD4<Int32> {
+  SIMD4(lowHalf: a, highHalf: b)
+}
+// CHECK: s22SIMDSignedInitializers10concat2x32ys5SIMD4Vys5Int32VGs5SIMD2VyAFG_AJtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat4x32(_ a: SIMD4<Int32>, _ b: SIMD4<Int32>) -> SIMD8<Int32> {
+  SIMD8(lowHalf: a, highHalf: b)
+}
+// CHECK: s22SIMDSignedInitializers10concat4x32ys5SIMD8Vys5Int32VGs5SIMD4VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat2x64(_ a: SIMD2<Int64>, _ b: SIMD2<Int64>) -> SIMD4<Int64> {
+  SIMD4(lowHalf: a, highHalf: b)
+}
+// CHECK: s22SIMDSignedInitializers10concat2x64ys5SIMD4Vys5Int64VGs5SIMD2VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
+
--- a/test/stdlib/SIMDUnsignedInitializers.swift.gyb
+++ b/test/stdlib/SIMDUnsignedInitializers.swift.gyb
@@ -0,0 +1,83 @@
+//===--- SIMDUnsignedInitializers.swift.gyb -------------------*- swift -*-===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2025 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
+//
+//===----------------------------------------------------------------------===//
+// RUN: %empty-directory(%t)
+// RUN: %gyb %s -o %t/SIMDUnsignedInitializers.swift
+// RUN: %target-swift-frontend -primary-file %t/SIMDUnsignedInitializers.swift -S | %FileCheck %t/SIMDUnsignedInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKOnone-%target-cpu
+// RUN: %target-swift-frontend -primary-file %t/SIMDUnsignedInitializers.swift -S -O | %FileCheck %t/SIMDUnsignedInitializers.swift --check-prefix=CHECK --check-prefix=CHECK-%target-cpu --check-prefix=CHECKO-%target-cpu
+
+import Swift
+
+%for bits in [8,16,32,64]:
+% for totalBits in [64,128]:
+%  n = totalBits // bits
+%  if n != 1:
+%   neonSuffix = str(n) + {8:'b',16:'h',32:'s',64:'d'}[bits]
+
+func repeating${n}_uint${bits}(_ scalar: UInt${bits}) -> SIMD${n}<UInt${bits}> {
+  SIMD${n}(repeating: scalar)
+}
+// CHECK: repeating${n}_uint${bits}{{[[:alnum:]]+}}:
+// CHECKO-arm64-NEXT: dup.${neonSuffix} v0, {{[wx]}}0
+// CHECKO-arm64-NEXT: ret
+// CHECKOnone-arm64: dup.${neonSuffix}
+// CHECKOnone-arm64: ret
+%  end
+% end
+%end
+
+func concat8x8(_ a: SIMD8<UInt8>, _ b: SIMD8<UInt8>) -> SIMD16<UInt8> {
+  SIMD16(lowHalf: a, highHalf: b)
+}
+// CHECK: s24SIMDUnsignedInitializers9concat8x8ys6SIMD16Vys5UInt8VGs5SIMD8VyAFG_AJtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat16x8(_ a: SIMD16<UInt8>, _ b: SIMD16<UInt8>) -> SIMD32<UInt8> {
+  SIMD32(lowHalf: a, highHalf: b)
+}
+// CHECK: s24SIMDUnsignedInitializers10concat16x8ys6SIMD32Vys5UInt8VGs6SIMD16VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat4x16(_ a: SIMD4<UInt16>, _ b: SIMD4<UInt16>) -> SIMD8<UInt16> {
+  SIMD8(lowHalf: a, highHalf: b)
+}
+// CHECK: s24SIMDUnsignedInitializers10concat4x16ys5SIMD8Vys6UInt16VGs5SIMD4VyAFG_AJtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat8x16(_ a: SIMD8<UInt16>, _ b: SIMD8<UInt16>) -> SIMD16<UInt16> {
+  SIMD16(lowHalf: a, highHalf: b)
+}
+// CHECK: s24SIMDUnsignedInitializers10concat8x16ys6SIMD16Vys6UInt16VGs5SIMD8VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat2x32(_ a: SIMD2<UInt32>, _ b: SIMD2<UInt32>) -> SIMD4<UInt32> {
+  SIMD4(lowHalf: a, highHalf: b)
+}
+// CHECK: s24SIMDUnsignedInitializers10concat2x32ys5SIMD4Vys6UInt32VGs5SIMD2VyAFG_AJtF:
+// CHECKO-arm64-NEXT: mov.d v0[1], v1[0]
+// CHECKO-arm64-NEXT: ret
+// CHECKO-x86_64: punpcklqdq
+
+func concat4x32(_ a: SIMD4<UInt32>, _ b: SIMD4<UInt32>) -> SIMD8<UInt32> {
+  SIMD8(lowHalf: a, highHalf: b)
+}
+// CHECK: s24SIMDUnsignedInitializers10concat4x32ys5SIMD8Vys6UInt32VGs5SIMD4VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
+
+func concat2x64(_ a: SIMD2<UInt64>, _ b: SIMD2<UInt64>) -> SIMD4<UInt64> {
+  SIMD4(lowHalf: a, highHalf: b)
+}
+// CHECK: s24SIMDUnsignedInitializers10concat2x64ys5SIMD4Vys6UInt64VGs5SIMD2VyAFG_AJtF:
+// CHECKO-arm64-NEXT: ret
--- a/utils/SwiftIntTypes.py
+++ b/utils/SwiftIntTypes.py
@@ -72,6 +72,16 @@ def all_integer_types(word_bits):
            is_word=True, bits=word_bits,
            is_signed=is_signed)

+def all_signed_types(word_bits):
+    for bitwidth in _all_integer_type_bitwidths:
+        yield SwiftIntegerType(
+            is_word=False, bits=bitwidth,
+            is_signed=True)
+    
+    yield SwiftIntegerType(
+        is_word=True, bits=word_bits,
+        is_signed=True)
+
 # 'truncatingBitPattern' initializer is defined if the conversion is truncating
 # on any platform that Swift supports.