mirror of
https://github.com/apple/swift.git
synced 2025-12-14 20:36:38 +01:00
* spelling: approximate Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: available Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: benchmarks Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: between Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: calculation Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: characterization Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: coefficient Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: computation Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: deterministic Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: divisor Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: encounter Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: expected Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: fibonacci Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: fulfill Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: implements Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: into Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: intrinsic Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: markdown Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: measure Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: occurrences Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: omitted Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: partition Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: performance Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: practice Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: preemptive Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: repeated Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: requirements Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: requires Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: response Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: supports Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: unknown Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: utilities Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> * spelling: verbose Signed-off-by: Josh Soref <jsoref@users.noreply.github.com> Co-authored-by: Josh Soref <jsoref@users.noreply.github.com>
248 lines
6.9 KiB
Swift
248 lines
6.9 KiB
Swift
//===--- SIMDReduceInteger.swift ------------------------------------------===//
|
|
//
|
|
// This source file is part of the Swift.org open source project
|
|
//
|
|
// Copyright (c) 2021 Apple Inc. and the Swift project authors
|
|
// Licensed under Apache License v2.0 with Runtime Library Exception
|
|
//
|
|
// See https://swift.org/LICENSE.txt for license information
|
|
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
import TestsUtils
|
|
|
|
public let benchmarks = [
|
|
BenchmarkInfo(
|
|
name: "SIMDReduce.Int32",
|
|
runFunction: run_SIMDReduceInt32x1,
|
|
tags: [.validation, .SIMD],
|
|
setUpFunction: { blackHole(int32Data) }
|
|
),
|
|
BenchmarkInfo(
|
|
name: "SIMDReduce.Int32x4.Initializer",
|
|
runFunction: run_SIMDReduceInt32x4_init,
|
|
tags: [.validation, .SIMD],
|
|
setUpFunction: { blackHole(int32Data) }
|
|
),
|
|
BenchmarkInfo(
|
|
name: "SIMDReduce.Int32x4.Cast",
|
|
runFunction: run_SIMDReduceInt32x4_cast,
|
|
tags: [.validation, .SIMD],
|
|
setUpFunction: { blackHole(int32Data) }
|
|
),
|
|
BenchmarkInfo(
|
|
name: "SIMDReduce.Int32x16.Initializer",
|
|
runFunction: run_SIMDReduceInt32x16_init,
|
|
tags: [.validation, .SIMD],
|
|
setUpFunction: { blackHole(int32Data) }
|
|
),
|
|
BenchmarkInfo(
|
|
name: "SIMDReduce.Int32x16.Cast",
|
|
runFunction: run_SIMDReduceInt32x16_cast,
|
|
tags: [.validation, .SIMD],
|
|
setUpFunction: { blackHole(int32Data) }
|
|
),
|
|
BenchmarkInfo(
|
|
name: "SIMDReduce.Int8",
|
|
runFunction: run_SIMDReduceInt8x1,
|
|
tags: [.validation, .SIMD],
|
|
setUpFunction: { blackHole(int8Data) }
|
|
),
|
|
BenchmarkInfo(
|
|
name: "SIMDReduce.Int8x16.Initializer",
|
|
runFunction: run_SIMDReduceInt8x16_init,
|
|
tags: [.validation, .SIMD],
|
|
setUpFunction: { blackHole(int8Data) }
|
|
),
|
|
BenchmarkInfo(
|
|
name: "SIMDReduce.Int8x16.Cast",
|
|
runFunction: run_SIMDReduceInt8x16_cast,
|
|
tags: [.validation, .SIMD],
|
|
setUpFunction: { blackHole(int8Data) }
|
|
),
|
|
BenchmarkInfo(
|
|
name: "SIMDReduce.Int8x64.Initializer",
|
|
runFunction: run_SIMDReduceInt8x64_init,
|
|
tags: [.validation, .SIMD],
|
|
setUpFunction: { blackHole(int8Data) }
|
|
),
|
|
BenchmarkInfo(
|
|
name: "SIMDReduce.Int8x64.Cast",
|
|
runFunction: run_SIMDReduceInt8x64_cast,
|
|
tags: [.validation, .SIMD],
|
|
setUpFunction: { blackHole(int8Data) }
|
|
)
|
|
]
|
|
|
|
// TODO: use 100 for Onone?
|
|
let scale = 1000
|
|
|
|
let int32Data: UnsafeBufferPointer<Int32> = {
|
|
let count = 256
|
|
// Allocate memory for `count` Int32s with alignment suitable for all
|
|
// SIMD vector types.
|
|
let untyped = UnsafeMutableRawBufferPointer.allocate(
|
|
byteCount: MemoryLayout<Int32>.size * count, alignment: 16
|
|
)
|
|
// Initialize the memory as Int32 and fill with random values.
|
|
let typed = untyped.initializeMemory(as: Int32.self, repeating: 0)
|
|
var g = SplitMix64(seed: 0)
|
|
for i in 0 ..< typed.count {
|
|
typed[i] = .random(in: .min ... .max, using: &g)
|
|
}
|
|
return UnsafeBufferPointer(typed)
|
|
}()
|
|
|
|
@inline(never)
|
|
public func run_SIMDReduceInt32x1(_ n: Int) {
|
|
for _ in 0 ..< scale*n {
|
|
var accum: Int32 = 0
|
|
for v in int32Data {
|
|
accum &+= v &* v
|
|
}
|
|
blackHole(accum)
|
|
}
|
|
}
|
|
|
|
@inline(never)
|
|
public func run_SIMDReduceInt32x4_init(_ n: Int) {
|
|
for _ in 0 ..< scale*n {
|
|
var accum = SIMD4<Int32>()
|
|
for i in stride(from: 0, to: int32Data.count, by: 4) {
|
|
let v = SIMD4(int32Data[i ..< i+4])
|
|
accum &+= v &* v
|
|
}
|
|
blackHole(accum.wrappedSum())
|
|
}
|
|
}
|
|
|
|
@inline(never)
|
|
public func run_SIMDReduceInt32x4_cast(_ n: Int) {
|
|
// Morally it seems like we "should" be able to use withMemoryRebound
|
|
// to SIMD4<Int32>, but that function requires that the sizes match in
|
|
// debug builds, so this is pretty ugly. The following "works" for now,
|
|
// but is probably in violation of the formal model (the exact rules
|
|
// for "assumingMemoryBound" are not clearly documented). We need a
|
|
// better solution.
|
|
let vecs = UnsafeBufferPointer<SIMD4<Int32>>(
|
|
start: UnsafeRawPointer(int32Data.baseAddress!).assumingMemoryBound(to: SIMD4<Int32>.self),
|
|
count: int32Data.count / 4
|
|
)
|
|
for _ in 0 ..< scale*n {
|
|
var accum = SIMD4<Int32>()
|
|
for v in vecs {
|
|
accum &+= v &* v
|
|
}
|
|
blackHole(accum.wrappedSum())
|
|
}
|
|
}
|
|
|
|
@inline(never)
|
|
public func run_SIMDReduceInt32x16_init(_ n: Int) {
|
|
for _ in 0 ..< scale*n {
|
|
var accum = SIMD16<Int32>()
|
|
for i in stride(from: 0, to: int32Data.count, by: 16) {
|
|
let v = SIMD16(int32Data[i ..< i+16])
|
|
accum &+= v &* v
|
|
}
|
|
blackHole(accum.wrappedSum())
|
|
}
|
|
}
|
|
|
|
@inline(never)
|
|
public func run_SIMDReduceInt32x16_cast(_ n: Int) {
|
|
let vecs = UnsafeBufferPointer<SIMD16<Int32>>(
|
|
start: UnsafeRawPointer(int32Data.baseAddress!).assumingMemoryBound(to: SIMD16<Int32>.self),
|
|
count: int32Data.count / 16
|
|
)
|
|
for _ in 0 ..< scale*n {
|
|
var accum = SIMD16<Int32>()
|
|
for v in vecs {
|
|
accum &+= v &* v
|
|
}
|
|
blackHole(accum.wrappedSum())
|
|
}
|
|
}
|
|
|
|
let int8Data: UnsafeBufferPointer<Int8> = {
|
|
let count = 1024
|
|
// Allocate memory for `count` Int8s with alignment suitable for all
|
|
// SIMD vector types.
|
|
let untyped = UnsafeMutableRawBufferPointer.allocate(
|
|
byteCount: MemoryLayout<Int8>.size * count, alignment: 16
|
|
)
|
|
// Initialize the memory as Int8 and fill with random values.
|
|
let typed = untyped.initializeMemory(as: Int8.self, repeating: 0)
|
|
var g = SplitMix64(seed: 0)
|
|
for i in 0 ..< typed.count {
|
|
typed[i] = .random(in: .min ... .max, using: &g)
|
|
}
|
|
return UnsafeBufferPointer(typed)
|
|
}()
|
|
|
|
@inline(never)
|
|
public func run_SIMDReduceInt8x1(_ n: Int) {
|
|
for _ in 0 ..< scale*n {
|
|
var accum: Int8 = 0
|
|
for v in int8Data {
|
|
accum &+= v &* v
|
|
}
|
|
blackHole(accum)
|
|
}
|
|
}
|
|
|
|
@inline(never)
|
|
public func run_SIMDReduceInt8x16_init(_ n: Int) {
|
|
for _ in 0 ..< scale*n {
|
|
var accum = SIMD16<Int8>()
|
|
for i in stride(from: 0, to: int8Data.count, by: 16) {
|
|
let v = SIMD16(int8Data[i ..< i+16])
|
|
accum &+= v &* v
|
|
}
|
|
blackHole(accum.wrappedSum())
|
|
}
|
|
}
|
|
|
|
@inline(never)
|
|
public func run_SIMDReduceInt8x16_cast(_ n: Int) {
|
|
let vecs = UnsafeBufferPointer<SIMD16<Int8>>(
|
|
start: UnsafeRawPointer(int8Data.baseAddress!).assumingMemoryBound(to: SIMD16<Int8>.self),
|
|
count: int8Data.count / 16
|
|
)
|
|
for _ in 0 ..< scale*n {
|
|
var accum = SIMD16<Int8>()
|
|
for v in vecs {
|
|
accum &+= v &* v
|
|
}
|
|
blackHole(accum.wrappedSum())
|
|
}
|
|
}
|
|
|
|
@inline(never)
|
|
public func run_SIMDReduceInt8x64_init(_ n: Int) {
|
|
for _ in 0 ..< scale*n {
|
|
var accum = SIMD64<Int8>()
|
|
for i in stride(from: 0, to: int8Data.count, by: 64) {
|
|
let v = SIMD64(int8Data[i ..< i+64])
|
|
accum &+= v &* v
|
|
}
|
|
blackHole(accum.wrappedSum())
|
|
}
|
|
}
|
|
|
|
@inline(never)
|
|
public func run_SIMDReduceInt8x64_cast(_ n: Int) {
|
|
let vecs = UnsafeBufferPointer<SIMD64<Int8>>(
|
|
start: UnsafeRawPointer(int8Data.baseAddress!).assumingMemoryBound(to: SIMD64<Int8>.self),
|
|
count: int8Data.count / 64
|
|
)
|
|
for _ in 0 ..< scale*n {
|
|
var accum = SIMD64<Int8>()
|
|
for v in vecs {
|
|
accum &+= v &* v
|
|
}
|
|
blackHole(accum.wrappedSum())
|
|
}
|
|
}
|