Files
linux-nvgpu/userspace/units/interface/atomic/atomic.c
2025-12-19 15:25:44 -08:00

1513 lines
39 KiB
C

/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <stdlib.h> /* for abs() */
#include <unit/unit.h>
#include <unit/io.h>
#include <unit/unit-requirement-ids.h>
#include <nvgpu/atomic.h>
#include <nvgpu/bug.h>
#include "atomic.h"
struct atomic_struct {
long not_atomic;
nvgpu_atomic_t atomic;
nvgpu_atomic64_t atomic64;
};
enum atomic_type {
ATOMIC_32,
ATOMIC_64,
NOT_ATOMIC,
};
enum atomic_op {
op_inc,
op_dec,
op_add,
op_sub,
op_inc_and_test,
op_dec_and_test,
op_sub_and_test,
op_add_unless,
op_cmpxchg,
};
struct atomic_test_args {
enum atomic_op op;
enum atomic_type type;
long start_val;
unsigned long loop_count;
unsigned long value; /* absolute value */
unsigned int repeat_count; /* This sets how many times to repeat a test
* Only applies to threaded tests
*/
};
struct atomic_thread_info {
struct atomic_struct *atomic;
struct atomic_test_args *margs;
pthread_t thread;
unsigned int thread_num;
unsigned int iterations;
long final_val;
long final_expected_val;
long xchg_val;
long unless;
};
static pthread_barrier_t thread_barrier;
bool stop_threads;
/*
* Define functions for atomic ops that handle all types so we can
* keep the code cleaner.
*/
static inline void func_set(enum atomic_type type, struct atomic_struct *ref,
long val)
{
switch (type) {
case NOT_ATOMIC:
ref->not_atomic = val;
break;
case ATOMIC_32:
nvgpu_atomic_set(&(ref->atomic), val);
break;
case ATOMIC_64:
nvgpu_atomic64_set(&(ref->atomic64), val);
break;
}
}
static inline long func_read(enum atomic_type type, struct atomic_struct *ref)
{
long ret = 0;
switch (type) {
case NOT_ATOMIC:
ret = ref->not_atomic;
break;
case ATOMIC_32:
ret = nvgpu_atomic_read(&(ref->atomic));
break;
case ATOMIC_64:
ret = nvgpu_atomic64_read(&(ref->atomic64));
break;
}
return ret;
}
static inline void func_inc(enum atomic_type type, struct atomic_struct *ref)
{
switch (type) {
case NOT_ATOMIC:
++ref->not_atomic;
break;
case ATOMIC_32:
nvgpu_atomic_inc(&(ref->atomic));
break;
case ATOMIC_64:
nvgpu_atomic64_inc(&(ref->atomic64));
break;
}
}
static inline long func_inc_return(enum atomic_type type,
struct atomic_struct *ref)
{
long ret = 0;
switch (type) {
case NOT_ATOMIC:
++ref->not_atomic;
ret = ref->not_atomic;
break;
case ATOMIC_32:
ret = nvgpu_atomic_inc_return(&(ref->atomic));
break;
case ATOMIC_64:
ret = nvgpu_atomic64_inc_return(&(ref->atomic64));
break;
}
return ret;
}
static inline bool func_inc_and_test(enum atomic_type type,
struct atomic_struct *ref)
{
bool ret = false;
switch (type) {
case NOT_ATOMIC:
++ref->not_atomic;
ret = (ref->not_atomic == 0);
break;
case ATOMIC_32:
ret = nvgpu_atomic_inc_and_test(&(ref->atomic));
break;
case ATOMIC_64:
ret = nvgpu_atomic64_inc_and_test(&(ref->atomic64));
break;
}
return ret;
}
static inline void func_dec(enum atomic_type type, struct atomic_struct *ref)
{
switch (type) {
case NOT_ATOMIC:
--ref->not_atomic;
break;
case ATOMIC_32:
nvgpu_atomic_dec(&(ref->atomic));
break;
case ATOMIC_64:
nvgpu_atomic64_dec(&(ref->atomic64));
break;
}
}
static inline long func_dec_return(enum atomic_type type,
struct atomic_struct *ref)
{
long ret = 0;
switch (type) {
case NOT_ATOMIC:
--ref->not_atomic;
ret = ref->not_atomic;
break;
case ATOMIC_32:
ret = nvgpu_atomic_dec_return(&(ref->atomic));
break;
case ATOMIC_64:
ret = nvgpu_atomic64_dec_return(&(ref->atomic64));
break;
}
return ret;
}
static inline bool func_dec_and_test(enum atomic_type type,
struct atomic_struct *ref)
{
bool ret = false;
switch (type) {
case NOT_ATOMIC:
--ref->not_atomic;
ret = (ref->not_atomic == 0);
break;
case ATOMIC_32:
ret = nvgpu_atomic_dec_and_test(&(ref->atomic));
break;
case ATOMIC_64:
ret = nvgpu_atomic64_dec_and_test(&(ref->atomic64));
break;
}
return ret;
}
static inline void func_add(enum atomic_type type, long val,
struct atomic_struct *ref)
{
switch (type) {
case NOT_ATOMIC:
ref->not_atomic += val;
break;
case ATOMIC_32:
nvgpu_atomic_add(val, &(ref->atomic));
break;
case ATOMIC_64:
nvgpu_atomic64_add(val, &(ref->atomic64));
break;
}
}
static inline long func_add_return(enum atomic_type type, long val,
struct atomic_struct *ref)
{
long ret = 0;
switch (type) {
case NOT_ATOMIC:
ref->not_atomic += val;
ret = ref->not_atomic;
break;
case ATOMIC_32:
ret = nvgpu_atomic_add_return(val, &(ref->atomic));
break;
case ATOMIC_64:
ret = nvgpu_atomic64_add_return(val, &(ref->atomic64));
break;
}
return ret;
}
static inline long func_add_unless(enum atomic_type type,
struct atomic_struct *ref, long val,
long unless)
{
long ret = 0;
switch (type) {
case NOT_ATOMIC:
ret = ref->not_atomic;
if (ret != unless) {
ref->not_atomic += val;
}
break;
case ATOMIC_32:
ret = nvgpu_atomic_add_unless(&(ref->atomic), val,
unless);
break;
case ATOMIC_64:
ret = nvgpu_atomic64_add_unless(&(ref->atomic64), val,
unless);
break;
}
return ret;
}
static inline void func_sub(enum atomic_type type, long val,
struct atomic_struct *ref)
{
switch (type) {
case NOT_ATOMIC:
ref->not_atomic -= val;
break;
case ATOMIC_32:
nvgpu_atomic_sub(val, &(ref->atomic));
break;
case ATOMIC_64:
nvgpu_atomic64_sub(val, &(ref->atomic64));
break;
}
}
static inline long func_sub_return(enum atomic_type type, long val,
struct atomic_struct *ref)
{
long ret = 0;
switch (type) {
case NOT_ATOMIC:
ref->not_atomic -= val;
ret = ref->not_atomic;
break;
case ATOMIC_32:
ret = nvgpu_atomic_sub_return(val, &(ref->atomic));
break;
case ATOMIC_64:
ret = nvgpu_atomic64_sub_return(val, &(ref->atomic64));
break;
}
return ret;
}
static inline bool func_sub_and_test(enum atomic_type type, long val,
struct atomic_struct *ref)
{
bool ret = 0;
switch (type) {
case NOT_ATOMIC:
ref->not_atomic -= val;
ret = (ref->not_atomic == 0);
break;
case ATOMIC_32:
ret = nvgpu_atomic_sub_and_test(val, &(ref->atomic));
break;
case ATOMIC_64:
ret = nvgpu_atomic64_sub_and_test(val,
&(ref->atomic64));
break;
}
return ret;
}
static inline long func_xchg(enum atomic_type type, struct atomic_struct *ref,
long new)
{
long ret = 0;
switch (type) {
case NOT_ATOMIC:
ret = ref->not_atomic;
ref->not_atomic = new;
break;
case ATOMIC_32:
ret = nvgpu_atomic_xchg(&(ref->atomic), new);
break;
case ATOMIC_64:
ret = nvgpu_atomic64_xchg(&(ref->atomic64), new);
break;
}
return ret;
}
static inline long func_cmpxchg(enum atomic_type type,
struct atomic_struct *ref, long old, long new)
{
long ret = 0;
switch (type) {
case NOT_ATOMIC:
ret = ref->not_atomic;
if (ret == old) {
ref->not_atomic = new;
}
break;
case ATOMIC_32:
ret = nvgpu_atomic_cmpxchg(&(ref->atomic), old, new);
break;
case ATOMIC_64:
ret = nvgpu_atomic64_cmpxchg(&(ref->atomic64), old,
new);
break;
}
return ret;
}
/*
* Helper macro that takes an atomic op from the enum and returns +1/-1
* to help doing arithemtic.
*/
#define ATOMIC_OP_SIGN(atomic_op) \
({ \
long sign; \
switch (atomic_op) { \
case op_dec: \
case op_sub: \
case op_dec_and_test: \
case op_sub_and_test: \
sign = -1; \
break; \
default: \
sign = 1; \
} \
sign; \
})
/* For the non-atomic case, we usually have to invert success/failure */
#define INVERTED_RESULT(result) \
(((result) == UNIT_FAIL) ? UNIT_SUCCESS : UNIT_FAIL)
/* Support function to do an atomic set and read verification */
static int single_set_and_read(struct unit_module *m,
struct atomic_struct *atomic,
enum atomic_type type, const long set_val)
{
long read_val;
if ((type == ATOMIC_32) &&
((set_val < INT_MIN) || (set_val > INT_MAX))) {
unit_return_fail(m, "Invalid value for 32 op\n");
}
func_set(type, atomic, set_val);
read_val = func_read(type, atomic);
if (read_val != set_val) {
unit_err(m, "Atomic returned wrong value. Expected: %ld "
"Received: %ld\n", (long)set_val, (long)read_val);
return UNIT_FAIL;
}
return UNIT_SUCCESS;
}
int test_atomic_set_and_read(struct unit_module *m,
struct gk20a *g, void *__args)
{
struct atomic_test_args *args = (struct atomic_test_args *)__args;
const unsigned int loop_limit = args->type == ATOMIC_32 ?
(sizeof(int) * 8) : (sizeof(long) * 8);
const long min_value = args->type == ATOMIC_32 ? INT_MIN :
LONG_MIN;
const long max_value = args->type == ATOMIC_32 ? INT_MAX :
LONG_MAX;
struct atomic_struct atomic = {0};
unsigned int i;
single_set_and_read(m, &atomic, args->type, min_value);
single_set_and_read(m, &atomic, args->type, max_value);
single_set_and_read(m, &atomic, args->type, 0);
for (i = 0; i < loop_limit; i++) {
if (single_set_and_read(m, &atomic, args->type, (1 << i))
!= UNIT_SUCCESS) {
return UNIT_FAIL;
}
}
return UNIT_SUCCESS;
}
int test_atomic_arithmetic(struct unit_module *m,
struct gk20a *g, void *__args)
{
struct atomic_test_args *args = (struct atomic_test_args *)__args;
struct atomic_struct atomic = {0};
unsigned int i;
long delta_magnitude;
long read_val;
long expected_val;
bool result_bool;
bool check_result_bool = false;
if (single_set_and_read(m, &atomic, args->type, args->start_val)
!= UNIT_SUCCESS) {
return UNIT_FAIL;
}
for (i = 1; i <= args->loop_count; i++) {
if (args->op == op_inc) {
/* use 2 since we test both inc and inc_return */
delta_magnitude = 2;
func_inc(args->type, &atomic);
read_val = func_inc_return(args->type, &atomic);
} else if (args->op == op_inc_and_test) {
delta_magnitude = 1;
check_result_bool = true;
result_bool = func_inc_and_test(args->type, &atomic);
read_val = func_read(args->type, &atomic);
} else if (args->op == op_dec) {
/* use 2 since we test both dec and dec_return */
delta_magnitude = 2;
func_dec(args->type, &atomic);
read_val = func_dec_return(args->type, &atomic);
} else if (args->op == op_dec_and_test) {
delta_magnitude = 1;
check_result_bool = true;
result_bool = func_dec_and_test(args->type, &atomic);
read_val = func_read(args->type, &atomic);
} else if (args->op == op_add) {
delta_magnitude = args->value * 2;
func_add(args->type, args->value, &atomic);
read_val = func_add_return(args->type, args->value,
&atomic);
} else if (args->op == op_sub) {
delta_magnitude = args->value * 2;
func_sub(args->type, args->value, &atomic);
read_val = func_sub_return(args->type, args->value,
&atomic);
} else if (args->op == op_sub_and_test) {
delta_magnitude = args->value;
check_result_bool = true;
result_bool = func_sub_and_test(args->type,
args->value, &atomic);
read_val = func_read(args->type, &atomic);
} else {
unit_return_fail(m, "Test error: invalid op in %s\n",
__func__);
}
expected_val = args->start_val +
(i * delta_magnitude * ATOMIC_OP_SIGN(args->op));
/* sanity check */
if ((args->type == ATOMIC_32) &&
((expected_val > INT_MAX) || (expected_val < INT_MIN))) {
unit_return_fail(m, "Test error: invalid value in %s\n",
__func__);
}
if (read_val != expected_val) {
unit_return_fail(m, "Atomic returned wrong value. "
"Expected: %ld Received: %ld\n",
(long)expected_val, (long)read_val);
}
if (check_result_bool) {
if (((expected_val == 0) && !result_bool) ||
((expected_val != 0) && result_bool)) {
unit_return_fail(m,
"Test result incorrect\n");
}
}
}
return UNIT_SUCCESS;
}
static void cmpxchg_inc(enum atomic_type type, struct atomic_struct *ref)
{
bool done = false;
long old;
while (!done) {
old = func_read(type, ref);
if (old == func_cmpxchg(type, ref, old, old + 1)) {
done = true;
}
}
}
/*
* Support function that runs in the threads for the arithmetic threaded
* test below
*/
static void *arithmetic_thread(void *__args)
{
struct atomic_thread_info *targs = (struct atomic_thread_info *)__args;
unsigned int i;
pthread_barrier_wait(&thread_barrier);
for (i = 0; i < targs->margs->loop_count; i++) {
if (targs->margs->op == op_cmpxchg) {
/* special case with special function */
cmpxchg_inc(targs->margs->type, targs->atomic);
} else if (targs->margs->op == op_inc) {
func_inc(targs->margs->type, targs->atomic);
} else if (targs->margs->op == op_dec) {
func_dec(targs->margs->type, targs->atomic);
} else if (targs->margs->op == op_add) {
/*
* Save the last value to sanity that threads aren't
* running sequentially
*/
targs->final_val = func_add_return(
targs->margs->type,
targs->margs->value,
targs->atomic);
} else if (targs->margs->op == op_add) {
func_add(targs->margs->type, targs->margs->value,
targs->atomic);
} else if (targs->margs->op == op_sub) {
func_sub(targs->margs->type, targs->margs->value,
targs->atomic);
} else if (targs->margs->op == op_inc_and_test) {
if (func_inc_and_test(targs->margs->type,
targs->atomic)) {
/*
* Only increment if atomic op returns true
* (that the value is 0)
*/
targs->iterations++;
}
} else if (targs->margs->op == op_dec_and_test) {
if (func_dec_and_test(targs->margs->type,
targs->atomic)) {
/*
* Only increment if atomic op returns true
* (that the value is 0)
*/
targs->iterations++;
}
} else if (targs->margs->op == op_sub_and_test) {
if (func_sub_and_test(targs->margs->type,
targs->margs->value,
targs->atomic)) {
/*
* Only increment if atomic op returns true
* (that the value is 0)
*/
targs->iterations++;
}
} else if (targs->margs->op == op_add_unless) {
if (func_add_unless(targs->margs->type,
targs->atomic, targs->margs->value,
targs->unless) != targs->unless) {
/*
* Increment until the atomic value is the
* "unless" value.
*/
targs->iterations++;
}
} else {
/*
* Don't print an error here because it would print
* for each thread. The main thread will catch this.
*/
break;
}
}
return NULL;
}
/*
* Support function to make sure the threaded arithmetic tests ran the correct
* number of iterations across threads, if applicable.
*/
static bool correct_thread_iteration_count(struct unit_module *m,
struct atomic_thread_info *threads,
unsigned int num_threads,
long expected_iterations)
{
unsigned int i;
long total_iterations = 0;
for (i = 0; i < num_threads; i++) {
total_iterations += threads[i].iterations;
}
if (total_iterations != expected_iterations) {
unit_err(m, "threaded test op took unexpected number of "
"iterations expected %ld took: %ld\n",
expected_iterations, total_iterations);
return false;
}
return true;
}
int test_atomic_arithmetic_threaded(struct unit_module *m,
struct gk20a *g, void *__args)
{
struct atomic_test_args *args = (struct atomic_test_args *)__args;
struct atomic_struct atomic = {0};
const unsigned int num_threads = 100;
struct atomic_thread_info threads[num_threads];
unsigned int i;
long expected_val, val, expected_iterations;
int ret = UNIT_SUCCESS;
if (single_set_and_read(m, &atomic, args->type, args->start_val)
!= UNIT_SUCCESS) {
return UNIT_FAIL;
}
pthread_barrier_init(&thread_barrier, NULL, num_threads);
/* setup and start threads */
for (i = 0; i < num_threads; i++) {
threads[i].atomic = &atomic;
threads[i].margs = args;
threads[i].thread_num = i;
threads[i].iterations = 0;
/* For add_unless, add until we hit half the iterations */
threads[i].unless = args->start_val +
(num_threads * args->loop_count / 2);
pthread_create(&threads[i].thread, NULL, arithmetic_thread,
&threads[i]);
}
/* wait for all threads to complete */
for (i = 0; i < num_threads; i++) {
pthread_join(threads[i].thread, NULL);
}
val = func_read(args->type, &atomic);
switch (args->op) {
case op_add_unless:
/*
* For add_unless, the threads increment their iteration
* counts until the atomic reaches the unless value,
* but continue calling the op in the loop to make sure
* it doesn't actually add anymore.
*/
expected_iterations = (threads[0].unless -
args->start_val + 1) /
args->value;
if (!correct_thread_iteration_count(m, threads,
num_threads, expected_iterations)) {
ret = UNIT_FAIL;
goto exit;
}
expected_val = threads[0].unless;
break;
case op_inc_and_test:
case op_dec_and_test:
case op_sub_and_test:
/*
* The threads only increment when the atomic op
* reports that it hit 0 which should only happen once.
*/
if (!correct_thread_iteration_count(m, threads,
num_threads, 1)) {
ret = UNIT_FAIL;
goto exit;
}
/* fall through! */
case op_add:
case op_sub:
case op_inc:
case op_dec:
case op_cmpxchg:
expected_val = args->start_val +
(args->loop_count * num_threads *
ATOMIC_OP_SIGN(args->op) * args->value);
break;
default:
unit_err(m, "Test error: invalid op in %s\n", __func__);
ret = UNIT_FAIL;
goto exit;
}
/* sanity check */
if ((args->type == ATOMIC_32) &&
((expected_val > INT_MAX) || (expected_val < INT_MIN))) {
unit_err(m, "Test error: invalid value in %s\n", __func__);
ret = UNIT_FAIL;
goto exit;
}
if (val != expected_val) {
unit_err(m, "threaded value incorrect expected: %ld "
"result: %ld\n",
expected_val, val);
ret = UNIT_FAIL;
goto exit;
}
if (args->op == op_add) {
/* sanity test that the threads aren't all sequential */
bool sequential = true;
for (i = 0; i < (num_threads - 1); i++) {
if (labs(threads[i].final_val - threads[i+1].final_val)
!= (long)args->loop_count) {
sequential = false;
break;
}
}
if (sequential) {
unit_err(m, "threads appear to have run "
"sequentially!\n");
ret = UNIT_FAIL;
goto exit;
}
}
exit:
pthread_barrier_destroy(&thread_barrier);
if (args->type == NOT_ATOMIC) {
/* For the non-atomics, pass is fail and fail is pass */
return INVERTED_RESULT(ret);
} else {
return ret;
}
}
/*
* Thread function for the test_atomic_arithmetic_and_test_threaded() test.
* Calls the *_and_inc_test op once and saves whether the op returned true by
* incrementing in the iterations thread struct.
*/
static void *arithmetic_and_test_updater_thread(void *__args)
{
struct atomic_thread_info *targs = (struct atomic_thread_info *)__args;
struct atomic_struct *atomic_p = targs->atomic;
bool is_zero;
unsigned int i;
while (true) {
/* wait here to start */
pthread_barrier_wait(&thread_barrier);
if (stop_threads) {
return NULL;
}
for (i = 0; i < targs->margs->loop_count; i++) {
switch (targs->margs->op) {
case op_inc_and_test:
is_zero = func_inc_and_test(
targs->margs->type,
atomic_p);
break;
case op_dec_and_test:
is_zero = func_dec_and_test(
targs->margs->type,
atomic_p);
break;
case op_sub_and_test:
is_zero = func_sub_and_test(
targs->margs->type,
targs->margs->value,
atomic_p);
break;
default:
/* designate failure */
is_zero = false;
break;
}
if (is_zero) {
/*
* Only count iterations where the op says the
* value is 0
*/
targs->iterations++;
}
}
/* wait until everyone finishes this iteration */
pthread_barrier_wait(&thread_barrier);
}
return NULL;
}
int test_atomic_arithmetic_and_test_threaded(struct unit_module *m,
struct gk20a *g, void *__args)
{
struct atomic_test_args *args = (struct atomic_test_args *)__args;
struct atomic_struct atomic;
const int num_threads = 100;
/* Start the atomic such that half the threads will potentially see 0 */
const long start_val = 0 -
(ATOMIC_OP_SIGN(args->op) * num_threads / 2) * args->loop_count;
struct atomic_thread_info threads[num_threads];
int i;
unsigned int repeat = args->repeat_count;
int result = UNIT_SUCCESS;
pthread_barrier_init(&thread_barrier, NULL, num_threads + 1);
stop_threads = false;
do {
if (single_set_and_read(m, &atomic, args->type, start_val) !=
UNIT_SUCCESS) {
return UNIT_FAIL;
}
/* setup threads */
for (i = 0; i < num_threads; i++) {
threads[i].iterations = 0;
if (repeat == args->repeat_count) {
threads[i].atomic = &atomic;
threads[i].margs = args;
threads[i].thread_num = i;
pthread_create(&threads[i].thread, NULL,
arithmetic_and_test_updater_thread,
&threads[i]);
}
}
/* start threads */
pthread_barrier_wait(&thread_barrier);
/* wait for all threads to complete */
pthread_barrier_wait(&thread_barrier);
/*
* The threads only count iterations where the test func
* returns true. So, this should only happen once.
*/
if (!correct_thread_iteration_count(m, threads,
num_threads, 1)) {
result = UNIT_FAIL;
break;
}
/*
* Note: The final value isn't verified because we are testing
* the atomicity of the operation and the testing. And the
* non-atomic case may fail the final value before failing the
* test being tested for.
*/
} while (repeat-- > 0);
/* signal the end to the threads, then wake them */
stop_threads = true;
pthread_barrier_wait(&thread_barrier);
/* wait for all threads to exit */
for (i = 0; i < num_threads; i++) {
pthread_join(threads[i].thread, NULL);
}
pthread_barrier_destroy(&thread_barrier);
if (args->type == NOT_ATOMIC) {
/* For the non-atomics, pass is fail and fail is pass */
return INVERTED_RESULT(result);
} else {
return result;
}
}
int test_atomic_xchg(struct unit_module *m,
struct gk20a *g, void *__args)
{
struct atomic_test_args *args = (struct atomic_test_args *)__args;
struct atomic_struct atomic = {0};
unsigned int i;
long new_val, old_val, ret_val;
if (single_set_and_read(m, &atomic, args->type, args->start_val)
!= UNIT_SUCCESS) {
return UNIT_FAIL;
}
old_val = args->start_val;
for (i = 0; i < args->loop_count; i++) {
/*
* alternate positive and negative values while increasing
* based on the loop counter
*/
new_val = (i % 2 ? 1 : -1) * (args->start_val + i);
/* only a 32bit xchg op */
ret_val = func_xchg(args->type, &atomic, new_val);
if (ret_val != old_val) {
unit_return_fail(m, "xchg returned bad old val "
"Expected: %ld, Received: %ld\n",
old_val, ret_val);
}
old_val = new_val;
}
return UNIT_SUCCESS;
}
/*
* Function to do xchg operation for the test_atomic_xchg_threaded() test
*
* Each thread will run a for loop which will xchg its value with the atomic
* See the main test for more details
*/
static void *xchg_thread(void *__args)
{
struct atomic_thread_info *targs = (struct atomic_thread_info *)__args;
unsigned int i;
while (true) {
/* wait here to start iteration */
pthread_barrier_wait(&thread_barrier);
if (stop_threads) {
return NULL;
}
for (i = 0; i < 1000; i++) {
targs->xchg_val = func_xchg(targs->margs->type,
targs->atomic, targs->xchg_val);
}
/* wait until everyone finishes this iteration */
pthread_barrier_wait(&thread_barrier);
}
return NULL;
}
int test_atomic_xchg_threaded(struct unit_module *m,
struct gk20a *g, void *__args)
{
struct atomic_test_args *args = (struct atomic_test_args *)__args;
struct atomic_struct atomic = {0};
const unsigned int num_threads = 100;
struct atomic_thread_info threads[num_threads];
unsigned int i;
unsigned int repeat = args->repeat_count;
int result = UNIT_SUCCESS;
const long start_val = -999;
bool start_val_present;
pthread_barrier_init(&thread_barrier, NULL, num_threads + 1);
stop_threads = false;
do {
/* start at -999 */
if (single_set_and_read(m, &atomic, args->type, start_val) !=
UNIT_SUCCESS) {
result = UNIT_FAIL;
goto exit;
}
/* setup threads */
for (i = 0; i < num_threads; i++) {
threads[i].iterations = 0;
threads[i].xchg_val = i;
if (repeat == args->repeat_count) {
threads[i].atomic = &atomic;
threads[i].margs = args;
threads[i].thread_num = i;
pthread_create(&threads[i].thread, NULL,
xchg_thread, &threads[i]);
}
}
/* start threads */
pthread_barrier_wait(&thread_barrier);
/* wait for all threads to complete */
pthread_barrier_wait(&thread_barrier);
start_val_present = false;
for (i = 0; i < num_threads; i++) {
unsigned int j;
if (threads[i].xchg_val == start_val) {
start_val_present = true;
}
for (j = (i + 1); j < num_threads; j++) {
if (threads[i].xchg_val ==
threads[j].xchg_val) {
unit_err(m, "duplicate value\n");
result = UNIT_FAIL;
goto exit;
}
}
}
if ((func_read(args->type, &atomic) != start_val) &&
!start_val_present) {
unit_err(m, "start value no present\n");
result = UNIT_FAIL;
goto exit;
}
} while (repeat-- > 0);
exit:
/* signal the end to the threads, then wake them */
stop_threads = true;
pthread_barrier_wait(&thread_barrier);
/* wait for all threads to exit */
for (i = 0; i < num_threads; i++) {
pthread_join(threads[i].thread, NULL);
}
pthread_barrier_destroy(&thread_barrier);
if (args->type == NOT_ATOMIC) {
/* For the non-atomics, pass is fail and fail is pass */
return INVERTED_RESULT(result);
} else {
return result;
}
}
int test_atomic_cmpxchg(struct unit_module *m,
struct gk20a *g, void *__args)
{
struct atomic_test_args *args = (struct atomic_test_args *)__args;
struct atomic_struct atomic = {0};
const int switch_interval = 5;
unsigned int i;
long new_val, old_val, ret_val;
bool should_match = true;
if (single_set_and_read(m, &atomic, args->type, args->start_val)
!= UNIT_SUCCESS) {
return UNIT_FAIL;
}
old_val = args->start_val;
for (i = 0; i < args->loop_count; i++) {
/*
* alternate whether the cmp should match each
* switch_interval
*/
if ((i % switch_interval) == 0) {
should_match = !should_match;
}
new_val = args->start_val + i;
if (should_match) {
ret_val = func_cmpxchg(args->type, &atomic,
old_val, new_val);
if (ret_val != old_val) {
unit_return_fail(m,
"cmpxchg returned bad old val "
"Expected: %ld, Received: %ld\n",
old_val, ret_val);
}
ret_val = func_read(args->type, &atomic);
if (ret_val != new_val) {
unit_return_fail(m,
"cmpxchg did not update "
"Expected: %ld, Received: %ld\n",
new_val, ret_val);
}
old_val = new_val;
} else {
ret_val = func_cmpxchg(args->type, &atomic,
-1 * old_val, new_val);
if (ret_val != old_val) {
unit_return_fail(m,
"cmpxchg returned bad old val "
"Expected: %ld, Received: %ld\n",
old_val, ret_val);
}
ret_val = func_read(args->type, &atomic);
if (ret_val != old_val) {
unit_return_fail(m,
"cmpxchg should not have updated "
"Expected: %ld, Received: %ld\n",
old_val, ret_val);
}
}
}
return UNIT_SUCCESS;
}
int test_atomic_add_unless(struct unit_module *m,
struct gk20a *g, void *__args)
{
struct atomic_test_args *args = (struct atomic_test_args *)__args;
struct atomic_struct atomic = {0};
const int switch_interval = 5;
unsigned int i;
int new_val, old_val, ret_val;
bool should_update = true;
if (single_set_and_read(m, &atomic, args->type, args->start_val)
!= UNIT_SUCCESS) {
return UNIT_FAIL;
}
old_val = args->start_val;
for (i = 0; i < args->loop_count; i++) {
/* alternate whether add should occur every switch_interval */
if ((i % switch_interval) == 0) {
should_update = !should_update;
}
if (should_update) {
/* This will fail to match and do the add */
ret_val = func_add_unless(args->type, &atomic,
args->value, old_val - 1);
if (ret_val != old_val) {
unit_return_fail(m,
"add_unless returned bad old val "
"Expected: %d, Received: %d\n",
old_val, ret_val);
}
new_val = old_val + args->value;
ret_val = func_read(args->type, &atomic);
if (ret_val != new_val) {
unit_return_fail(m, "add_unless did not "
"update Expected: %d, "
"Received: %d\n",
new_val, ret_val);
}
old_val = ret_val;
} else {
/* This will match the old value and won't add */
ret_val = func_add_unless(args->type, &atomic,
args->value, old_val);
if (ret_val != old_val) {
unit_return_fail(m,
"add_unless returned bad old val "
"Expected: %d, Received: %d\n",
old_val, ret_val);
}
ret_val = func_read(args->type, &atomic);
if (ret_val != old_val) {
unit_return_fail(m, "add_unless should not "
"have updated Expected: %d, "
"Received: %d\n",
old_val, ret_val);
}
}
}
return UNIT_SUCCESS;
}
static struct atomic_test_args set_and_read_32_arg = {
.type = ATOMIC_32,
};
static struct atomic_test_args set_and_read_64_arg = {
.type = ATOMIC_64,
};
static struct atomic_test_args inc_32_arg = {
.op = op_inc,
.type = ATOMIC_32,
.start_val = -500,
.loop_count = 10000,
.value = 1,
};
static struct atomic_test_args inc_and_test_32_arg = {
/* must cross 0 */
.op = op_inc_and_test,
.type = ATOMIC_32,
.start_val = -500,
.loop_count = 10000,
.value = 1,
};
static struct atomic_test_args inc_and_test_64_arg = {
/* must cross 0 */
.op = op_inc_and_test,
.type = ATOMIC_64,
.start_val = -500,
.loop_count = 10000,
.value = 1,
};
static struct atomic_test_args inc_and_test_not_atomic_threaded_arg = {
/* must cross 0 */
.op = op_inc_and_test,
.type = NOT_ATOMIC,
.loop_count = 100,
.repeat_count = 5000, /* for threaded test */
};
static struct atomic_test_args inc_and_test_32_threaded_arg = {
/* must cross 0 */
.op = op_inc_and_test,
.type = ATOMIC_32,
.loop_count = 100,
.repeat_count = 5000, /* for threaded test */
};
static struct atomic_test_args inc_and_test_64_threaded_arg = {
/* must cross 0 */
.op = op_inc_and_test,
.type = ATOMIC_32,
.loop_count = 100,
.repeat_count = 5000, /* for threaded test */
};
static struct atomic_test_args inc_64_arg = {
.op = op_inc,
.type = ATOMIC_64,
.start_val = INT_MAX - 500,
.loop_count = 10000,
.value = 1,
};
static struct atomic_test_args dec_32_arg = {
.op = op_dec,
.type = ATOMIC_32,
.start_val = 500,
.loop_count = 10000,
.value = 1,
};
static struct atomic_test_args dec_and_test_32_arg = {
/* must cross 0 */
.op = op_dec_and_test,
.type = ATOMIC_32,
.start_val = 500,
.loop_count = 10000,
.value = 1,
};
static struct atomic_test_args dec_and_test_64_arg = {
/* must cross 0 */
.op = op_dec_and_test,
.type = ATOMIC_64,
.start_val = 500,
.loop_count = 10000,
.value = 1,
};
static struct atomic_test_args dec_and_test_not_atomic_threaded_arg = {
/* must cross 0 */
.op = op_dec_and_test,
.type = NOT_ATOMIC,
.loop_count = 100,
.repeat_count = 5000, /* for threaded test */
};
static struct atomic_test_args dec_and_test_32_threaded_arg = {
/* must cross 0 */
.op = op_dec_and_test,
.type = ATOMIC_32,
.loop_count = 100,
.repeat_count = 5000, /* for threaded test */
};
static struct atomic_test_args dec_and_test_64_threaded_arg = {
/* must cross 0 */
.op = op_dec_and_test,
.type = ATOMIC_32,
.loop_count = 100,
.repeat_count = 5000, /* for threaded test */
};
static struct atomic_test_args dec_64_arg = {
.op = op_dec,
.type = ATOMIC_64,
.start_val = INT_MIN + 500,
.loop_count = 10000,
.value = 1,
};
static struct atomic_test_args add_32_arg = {
.op = op_add,
.type = ATOMIC_32,
.start_val = -500,
.loop_count = 10000,
.value = 7,
};
static struct atomic_test_args add_64_arg = {
.op = op_add,
.type = ATOMIC_64,
.start_val = INT_MAX - 500,
.loop_count = 10000,
.value = 7,
};
struct atomic_test_args sub_32_arg = {
.op = op_sub,
.type = ATOMIC_32,
.start_val = 500,
.loop_count = 10000,
.value = 7,
};
static struct atomic_test_args sub_64_arg = {
.op = op_sub,
.type = ATOMIC_64,
.start_val = INT_MIN + 500,
.loop_count = 10000,
.value = 7,
};
static struct atomic_test_args sub_and_test_32_arg = {
/* must cross 0 */
.op = op_sub_and_test,
.type = ATOMIC_32,
.start_val = 500,
.loop_count = 10000,
.value = 5,
};
static struct atomic_test_args sub_and_test_64_arg = {
/* must cross 0 */
.op = op_sub_and_test,
.type = ATOMIC_64,
.start_val = 500,
.loop_count = 10000,
.value = 5,
};
static struct atomic_test_args sub_and_test_not_atomic_threaded_arg = {
/* must cross 0 */
.op = op_sub_and_test,
.type = NOT_ATOMIC,
.loop_count = 100,
.value = 5,
.repeat_count = 5000, /* for threaded test */
};
static struct atomic_test_args sub_and_test_32_threaded_arg = {
/* must cross 0 */
.op = op_sub_and_test,
.type = ATOMIC_32,
.loop_count = 100,
.value = 5,
.repeat_count = 5000, /* for threaded test */
};
static struct atomic_test_args sub_and_test_64_threaded_arg = {
/* must cross 0 */
.op = op_sub_and_test,
.type = ATOMIC_32,
.loop_count = 100,
.value = 5,
.repeat_count = 5000, /* for threaded test */
};
struct atomic_test_args xchg_not_atomic_arg = {
.op = op_cmpxchg,
.type = NOT_ATOMIC,
.start_val = 1,
.value = 1,
.loop_count = 10000,
.repeat_count = 5000, /* for threaded test */
};
struct atomic_test_args xchg_32_arg = {
.op = op_cmpxchg,
.type = ATOMIC_32,
.start_val = 1,
.value = 1,
.loop_count = 10000,
.repeat_count = 5000, /* for threaded test */
};
struct atomic_test_args xchg_64_arg = {
.op = op_cmpxchg,
.type = ATOMIC_64,
.start_val = INT_MAX,
.value = 1,
.loop_count = 10000,
.repeat_count = 5000, /* for threaded test */
};
struct atomic_test_args cmpxchg_not_atomic_arg = {
.op = op_cmpxchg,
.type = NOT_ATOMIC,
.start_val = 1,
.value = 1,
.loop_count = 10000,
.repeat_count = 50000, /* for threaded test */
};
struct atomic_test_args cmpxchg_32_arg = {
.op = op_cmpxchg,
.type = ATOMIC_32,
.start_val = 1,
.value = 1,
.loop_count = 10000,
.repeat_count = 50000, /* for threaded test */
};
struct atomic_test_args cmpxchg_64_arg = {
.op = op_cmpxchg,
.type = ATOMIC_64,
.start_val = INT_MAX,
.value = 1,
.loop_count = 10000,
.repeat_count = 50000, /* for threaded test */
};
static struct atomic_test_args add_unless_32_arg = {
/* must loop at least 10 times */
.op = op_add_unless,
.type = ATOMIC_32,
.start_val = -500,
.loop_count = 10000,
.value = 5,
};
static struct atomic_test_args add_unless_64_arg = {
/* must loop at least 10 times */
.op = op_add_unless,
.type = ATOMIC_64,
.start_val = -500,
.loop_count = 10000,
.value = 5,
};
struct unit_module_test atomic_tests[] = {
/* Level 0 tests */
UNIT_TEST(atomic_set_and_read_32, test_atomic_set_and_read, &set_and_read_32_arg, 0),
UNIT_TEST(atomic_set_and_read_64, test_atomic_set_and_read, &set_and_read_64_arg, 0),
UNIT_TEST(atomic_inc_32, test_atomic_arithmetic, &inc_32_arg, 0),
UNIT_TEST(atomic_inc_and_test_32, test_atomic_arithmetic, &inc_and_test_32_arg, 0),
UNIT_TEST(atomic_inc_and_test_64, test_atomic_arithmetic, &inc_and_test_64_arg, 0),
UNIT_TEST(atomic_inc_64, test_atomic_arithmetic, &inc_64_arg, 0),
UNIT_TEST(atomic_dec_32, test_atomic_arithmetic, &dec_32_arg, 0),
UNIT_TEST(atomic_dec_64, test_atomic_arithmetic, &dec_64_arg, 0),
UNIT_TEST(atomic_dec_and_test_32, test_atomic_arithmetic, &dec_and_test_32_arg, 0),
UNIT_TEST(atomic_dec_and_test_64, test_atomic_arithmetic, &dec_and_test_64_arg, 0),
UNIT_TEST(atomic_add_32, test_atomic_arithmetic, &add_32_arg, 0),
UNIT_TEST(atomic_add_64, test_atomic_arithmetic, &add_64_arg, 0),
UNIT_TEST(atomic_sub_32, test_atomic_arithmetic, &sub_32_arg, 0),
UNIT_TEST(atomic_sub_64, test_atomic_arithmetic, &sub_64_arg, 0),
UNIT_TEST(atomic_sub_and_test_32, test_atomic_arithmetic, &sub_and_test_32_arg, 0),
UNIT_TEST(atomic_sub_and_test_64, test_atomic_arithmetic, &sub_and_test_64_arg, 0),
UNIT_TEST(atomic_xchg_32, test_atomic_xchg, &xchg_32_arg, 0),
UNIT_TEST(atomic_xchg_64, test_atomic_xchg, &xchg_64_arg, 0),
UNIT_TEST(atomic_cmpxchg_32, test_atomic_cmpxchg, &xchg_32_arg, 0),
UNIT_TEST(atomic_cmpxchg_64, test_atomic_cmpxchg, &xchg_64_arg, 0),
UNIT_TEST(atomic_add_unless_32, test_atomic_add_unless, &add_unless_32_arg, 0),
UNIT_TEST(atomic_add_unless_64, test_atomic_add_unless, &add_unless_64_arg, 0),
UNIT_TEST(atomic_inc_32_threaded, test_atomic_arithmetic_threaded, &inc_32_arg, 0),
UNIT_TEST(atomic_inc_64_threaded, test_atomic_arithmetic_threaded, &inc_64_arg, 0),
UNIT_TEST(atomic_dec_32_threaded, test_atomic_arithmetic_threaded, &dec_32_arg, 0),
UNIT_TEST(atomic_dec_64_threaded, test_atomic_arithmetic_threaded, &dec_64_arg, 0),
UNIT_TEST(atomic_add_32_threaded, test_atomic_arithmetic_threaded, &add_32_arg, 0),
UNIT_TEST(atomic_add_64_threaded, test_atomic_arithmetic_threaded, &add_64_arg, 0),
UNIT_TEST(atomic_sub_32_threaded, test_atomic_arithmetic_threaded, &sub_32_arg, 0),
UNIT_TEST(atomic_sub_64_threaded, test_atomic_arithmetic_threaded, &sub_64_arg, 0),
UNIT_TEST(atomic_cmpxchg_not_atomic_threaded, test_atomic_arithmetic_threaded, &cmpxchg_not_atomic_arg, 0),
UNIT_TEST(atomic_cmpxchg_32_threaded, test_atomic_arithmetic_threaded, &cmpxchg_32_arg, 0),
UNIT_TEST(atomic_cmpxchg_64_threaded, test_atomic_arithmetic_threaded, &cmpxchg_64_arg, 0),
/* Level 1 tests */
UNIT_TEST(atomic_inc_and_test_not_atomic_threaded, test_atomic_arithmetic_and_test_threaded, &inc_and_test_not_atomic_threaded_arg, 1),
UNIT_TEST(atomic_inc_and_test_32_threaded, test_atomic_arithmetic_and_test_threaded, &inc_and_test_32_threaded_arg, 1),
UNIT_TEST(atomic_inc_and_test_64_threaded, test_atomic_arithmetic_and_test_threaded, &inc_and_test_64_threaded_arg, 1),
UNIT_TEST(atomic_dec_and_test_not_atomic_threaded, test_atomic_arithmetic_and_test_threaded, &dec_and_test_not_atomic_threaded_arg, 1),
UNIT_TEST(atomic_dec_and_test_32_threaded, test_atomic_arithmetic_and_test_threaded, &dec_and_test_32_threaded_arg, 1),
UNIT_TEST(atomic_dec_and_test_64_threaded, test_atomic_arithmetic_and_test_threaded, &dec_and_test_64_threaded_arg, 1),
UNIT_TEST(atomic_sub_and_test_not_atomic_threaded, test_atomic_arithmetic_and_test_threaded, &sub_and_test_not_atomic_threaded_arg, 1),
UNIT_TEST(atomic_sub_and_test_32_threaded, test_atomic_arithmetic_and_test_threaded, &sub_and_test_32_threaded_arg, 1),
UNIT_TEST(atomic_sub_and_test_64_threaded, test_atomic_arithmetic_and_test_threaded, &sub_and_test_64_threaded_arg, 1),
UNIT_TEST(atomic_add_unless_32_threaded, test_atomic_arithmetic_threaded, &add_unless_32_arg, 1),
UNIT_TEST(atomic_add_unless_64_threaded, test_atomic_arithmetic_threaded, &add_unless_64_arg, 1),
UNIT_TEST(atomic_xchg_not_atomic_threaded, test_atomic_xchg_threaded, &xchg_not_atomic_arg, 1),
UNIT_TEST(atomic_xchg_32_threaded, test_atomic_xchg_threaded, &xchg_32_arg, 1),
UNIT_TEST(atomic_xchg_64_threaded, test_atomic_xchg_threaded, &xchg_64_arg, 1),
};
UNIT_MODULE(atomic, atomic_tests, UNIT_PRIO_POSIX_TEST);