Files
linux-nvgpu/userspace/units/interface/atomic/atomic.c
Philip Elcan 82ce44d882 gpu: nvgpu: unit: atomics: add cmpxchg threaded test
Add a threaded test for nvgpu_cmpxchg API. This test implements an
atomic increment using cmpxchg. It uses the existing arithmetic
threaded framework to use the cmpxchg_inc() function to verify 100
parallel threads increment the atomic the correct number of times.

These are L1 tests since they have longer run times and are unlikely to
regress.

JIRA NVGPU-2251

Change-Id: I9c2b68052b3a1b6ef20adfa24e7d50746902f754
Signed-off-by: Philip Elcan <pelcan@nvidia.com>
Reviewed-on: https://git-master.nvidia.com/r/2100748
Reviewed-by: svc-mobile-coverity <svc-mobile-coverity@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Alex Waterman <alexw@nvidia.com>
Reviewed-by: mobile promotions <svcmobile_promotions@nvidia.com>
Tested-by: mobile promotions <svcmobile_promotions@nvidia.com>
2019-04-25 15:14:17 -07:00

1507 lines
40 KiB
C

/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#include <stdlib.h> /* for abs() */
#include <unit/unit.h>
#include <unit/io.h>
#include <unit/unit-requirement-ids.h>
#include <nvgpu/atomic.h>
#include <nvgpu/bug.h>
struct atomic_struct {
long not_atomic;
nvgpu_atomic_t atomic;
nvgpu_atomic64_t atomic64;
};
enum atomic_type {
ATOMIC_32,
ATOMIC_64,
NOT_ATOMIC,
};
enum atomic_op {
op_inc,
op_dec,
op_add,
op_sub,
op_inc_and_test,
op_dec_and_test,
op_sub_and_test,
op_add_unless,
op_cmpxchg,
};
struct atomic_test_args {
enum atomic_op op;
enum atomic_type type;
long start_val;
unsigned long loop_count;
unsigned long value; /* absolute value */
unsigned int repeat_count; /* This sets how many times to repeat a test
* Only applies to threaded tests
*/
};
struct atomic_thread_info {
struct atomic_struct *atomic;
struct atomic_test_args *margs;
pthread_t thread;
unsigned int thread_num;
unsigned int iterations;
long final_val;
long final_expected_val;
long xchg_val;
long unless;
};
static pthread_barrier_t thread_barrier;
bool stop_threads;
/*
* Define functions for atomic ops that handle all types so we can
* keep the code cleaner.
*/
static inline void func_set(enum atomic_type type, struct atomic_struct *ref,
long val)
{
switch (type) {
case NOT_ATOMIC:
ref->not_atomic = val;
break;
case ATOMIC_32:
nvgpu_atomic_set(&(ref->atomic), val);
break;
case ATOMIC_64:
nvgpu_atomic64_set(&(ref->atomic64), val);
break;
}
}
static inline long func_read(enum atomic_type type, struct atomic_struct *ref)
{
long ret = 0;
switch (type) {
case NOT_ATOMIC:
ret = ref->not_atomic;
break;
case ATOMIC_32:
ret = nvgpu_atomic_read(&(ref->atomic));
break;
case ATOMIC_64:
ret = nvgpu_atomic64_read(&(ref->atomic64));
break;
}
return ret;
}
static inline void func_inc(enum atomic_type type, struct atomic_struct *ref)
{
switch (type) {
case NOT_ATOMIC:
++ref->not_atomic;
break;
case ATOMIC_32:
nvgpu_atomic_inc(&(ref->atomic));
break;
case ATOMIC_64:
nvgpu_atomic64_inc(&(ref->atomic64));
break;
}
}
static inline long func_inc_return(enum atomic_type type,
struct atomic_struct *ref)
{
long ret = 0;
switch (type) {
case NOT_ATOMIC:
++ref->not_atomic;
ret = ref->not_atomic;
break;
case ATOMIC_32:
ret = nvgpu_atomic_inc_return(&(ref->atomic));
break;
case ATOMIC_64:
ret = nvgpu_atomic64_inc_return(&(ref->atomic64));
break;
}
return ret;
}
static inline bool func_inc_and_test(enum atomic_type type,
struct atomic_struct *ref)
{
bool ret = false;
switch (type) {
case NOT_ATOMIC:
++ref->not_atomic;
ret = (ref->not_atomic == 0);
break;
case ATOMIC_32:
ret = nvgpu_atomic_inc_and_test(&(ref->atomic));
break;
case ATOMIC_64:
ret = nvgpu_atomic64_inc_and_test(&(ref->atomic64));
break;
}
return ret;
}
static inline void func_dec(enum atomic_type type, struct atomic_struct *ref)
{
switch (type) {
case NOT_ATOMIC:
--ref->not_atomic;
break;
case ATOMIC_32:
nvgpu_atomic_dec(&(ref->atomic));
break;
case ATOMIC_64:
nvgpu_atomic64_dec(&(ref->atomic64));
break;
}
}
static inline long func_dec_return(enum atomic_type type,
struct atomic_struct *ref)
{
long ret = 0;
switch (type) {
case NOT_ATOMIC:
--ref->not_atomic;
ret = ref->not_atomic;
break;
case ATOMIC_32:
ret = nvgpu_atomic_dec_return(&(ref->atomic));
break;
case ATOMIC_64:
ret = nvgpu_atomic64_dec_return(&(ref->atomic64));
break;
}
return ret;
}
static inline bool func_dec_and_test(enum atomic_type type,
struct atomic_struct *ref)
{
bool ret = false;
switch (type) {
case NOT_ATOMIC:
--ref->not_atomic;
ret = (ref->not_atomic == 0);
break;
case ATOMIC_32:
ret = nvgpu_atomic_dec_and_test(&(ref->atomic));
break;
case ATOMIC_64:
ret = nvgpu_atomic64_dec_and_test(&(ref->atomic64));
break;
}
return ret;
}
static inline void func_add(enum atomic_type type, long val,
struct atomic_struct *ref)
{
switch (type) {
case NOT_ATOMIC:
ref->not_atomic += val;
break;
case ATOMIC_32:
nvgpu_atomic_add(val, &(ref->atomic));
break;
case ATOMIC_64:
nvgpu_atomic64_add(val, &(ref->atomic64));
break;
}
}
static inline long func_add_return(enum atomic_type type, long val,
struct atomic_struct *ref)
{
long ret = 0;
switch (type) {
case NOT_ATOMIC:
ref->not_atomic += val;
ret = ref->not_atomic;
break;
case ATOMIC_32:
ret = nvgpu_atomic_add_return(val, &(ref->atomic));
break;
case ATOMIC_64:
ret = nvgpu_atomic64_add_return(val, &(ref->atomic64));
break;
}
return ret;
}
static inline long func_add_unless(enum atomic_type type,
struct atomic_struct *ref, long val,
long unless)
{
long ret = 0;
switch (type) {
case NOT_ATOMIC:
ret = ref->not_atomic;
if (ret != unless) {
ref->not_atomic += val;
}
break;
case ATOMIC_32:
ret = nvgpu_atomic_add_unless(&(ref->atomic), val,
unless);
break;
case ATOMIC_64:
ret = nvgpu_atomic64_add_unless(&(ref->atomic64), val,
unless);
break;
}
return ret;
}
static inline void func_sub(enum atomic_type type, long val,
struct atomic_struct *ref)
{
switch (type) {
case NOT_ATOMIC:
ref->not_atomic -= val;
break;
case ATOMIC_32:
nvgpu_atomic_sub(val, &(ref->atomic));
break;
case ATOMIC_64:
nvgpu_atomic64_sub(val, &(ref->atomic64));
break;
}
}
static inline long func_sub_return(enum atomic_type type, long val,
struct atomic_struct *ref)
{
long ret = 0;
switch (type) {
case NOT_ATOMIC:
ref->not_atomic -= val;
ret = ref->not_atomic;
break;
case ATOMIC_32:
ret = nvgpu_atomic_sub_return(val, &(ref->atomic));
break;
case ATOMIC_64:
ret = nvgpu_atomic64_sub_return(val, &(ref->atomic64));
break;
}
return ret;
}
static inline bool func_sub_and_test(enum atomic_type type, long val,
struct atomic_struct *ref)
{
bool ret = 0;
switch (type) {
case NOT_ATOMIC:
ref->not_atomic -= val;
ret = (ref->not_atomic == 0);
break;
case ATOMIC_32:
ret = nvgpu_atomic_sub_and_test(val, &(ref->atomic));
break;
case ATOMIC_64:
ret = nvgpu_atomic64_sub_and_test(val,
&(ref->atomic64));
break;
}
return ret;
}
static inline long func_xchg(enum atomic_type type, struct atomic_struct *ref,
long new)
{
long ret = 0;
switch (type) {
case NOT_ATOMIC:
ret = ref->not_atomic;
ref->not_atomic = new;
break;
case ATOMIC_32:
ret = nvgpu_atomic_xchg(&(ref->atomic), new);
break;
case ATOMIC_64:
ret = nvgpu_atomic64_xchg(&(ref->atomic64), new);
break;
}
return ret;
}
static inline long func_cmpxchg(enum atomic_type type,
struct atomic_struct *ref, long old, long new)
{
long ret = 0;
switch (type) {
case NOT_ATOMIC:
ret = ref->not_atomic;
if (ret == old) {
ref->not_atomic = new;
}
break;
case ATOMIC_32:
ret = nvgpu_atomic_cmpxchg(&(ref->atomic), old, new);
break;
case ATOMIC_64:
ret = nvgpu_atomic64_cmpxchg(&(ref->atomic64), old,
new);
break;
}
return ret;
}
/*
* Helper macro that takes an atomic op from the enum and returns +1/-1
* to help doing arithemtic.
*/
#define ATOMIC_OP_SIGN(atomic_op) \
({ \
long sign; \
switch (atomic_op) { \
case op_dec: \
case op_sub: \
case op_dec_and_test: \
case op_sub_and_test: \
sign = -1; \
break; \
default: \
sign = 1; \
} \
sign; \
})
/* For the non-atomic case, we usually have to invert success/failure */
#define INVERTED_RESULT(result) \
(((result) == UNIT_FAIL) ? UNIT_SUCCESS : UNIT_FAIL)
/* Support function to do an atomic set and read verification */
static int single_set_and_read(struct unit_module *m,
struct atomic_struct *atomic,
enum atomic_type type, const long set_val)
{
long read_val;
if ((type == ATOMIC_32) &&
((set_val < INT_MIN) || (set_val > INT_MAX))) {
unit_return_fail(m, "Invalid value for 32 op\n");
}
func_set(type, atomic, set_val);
read_val = func_read(type, atomic);
if (read_val != set_val) {
unit_err(m, "Atomic returned wrong value. Expected: %ld "
"Received: %ld\n", (long)set_val, (long)read_val);
return UNIT_FAIL;
}
return UNIT_SUCCESS;
}
/*
* Test atomic read and set operations single threaded for proper functionality
*
* Tests setting the limit values for each size.
* Loops through setting each bit in a 32/64bit value.
*/
static int test_atomic_set_and_read(struct unit_module *m,
struct gk20a *g, void *__args)
{
struct atomic_test_args *args = (struct atomic_test_args *)__args;
const unsigned int loop_limit = args->type == ATOMIC_32 ?
(sizeof(int) * 8) : (sizeof(long) * 8);
const long min_value = args->type == ATOMIC_32 ? INT_MIN :
LONG_MIN;
const long max_value = args->type == ATOMIC_32 ? INT_MAX :
LONG_MAX;
struct atomic_struct atomic = {0};
unsigned int i;
single_set_and_read(m, &atomic, args->type, min_value);
single_set_and_read(m, &atomic, args->type, max_value);
single_set_and_read(m, &atomic, args->type, 0);
for (i = 0; i < loop_limit; i++) {
if (single_set_and_read(m, &atomic, args->type, (1 << i))
!= UNIT_SUCCESS) {
return UNIT_FAIL;
}
}
return UNIT_SUCCESS;
}
/*
* Test arithmetic atomic operations single threaded for proper functionality
* inc, dec, add, sub and friends (except add_unless)
* Sets a start value from args
* Loops (iterations per args param)
* Validates final result
*
* For *_and_test ops, the args should make sure the loop traverses across 0
* to test the "test" part.
*/
static int test_atomic_arithmetic(struct unit_module *m,
struct gk20a *g, void *__args)
{
struct atomic_test_args *args = (struct atomic_test_args *)__args;
struct atomic_struct atomic = {0};
unsigned int i;
long delta_magnitude;
long read_val;
long expected_val;
bool result_bool;
bool check_result_bool = false;
if (single_set_and_read(m, &atomic, args->type, args->start_val)
!= UNIT_SUCCESS) {
return UNIT_FAIL;
}
for (i = 1; i <= args->loop_count; i++) {
if (args->op == op_inc) {
/* use 2 since we test both inc and inc_return */
delta_magnitude = 2;
func_inc(args->type, &atomic);
read_val = func_inc_return(args->type, &atomic);
} else if (args->op == op_inc_and_test) {
delta_magnitude = 1;
check_result_bool = true;
result_bool = func_inc_and_test(args->type, &atomic);
read_val = func_read(args->type, &atomic);
} else if (args->op == op_dec) {
/* use 2 since we test both dec and dec_return */
delta_magnitude = 2;
func_dec(args->type, &atomic);
read_val = func_dec_return(args->type, &atomic);
} else if (args->op == op_dec_and_test) {
delta_magnitude = 1;
check_result_bool = true;
result_bool = func_dec_and_test(args->type, &atomic);
read_val = func_read(args->type, &atomic);
} else if (args->op == op_add) {
delta_magnitude = args->value * 2;
func_add(args->type, args->value, &atomic);
read_val = func_add_return(args->type, args->value,
&atomic);
} else if (args->op == op_sub) {
delta_magnitude = args->value * 2;
func_sub(args->type, args->value, &atomic);
read_val = func_sub_return(args->type, args->value,
&atomic);
} else if (args->op == op_sub_and_test) {
delta_magnitude = args->value;
check_result_bool = true;
result_bool = func_sub_and_test(args->type,
args->value, &atomic);
read_val = func_read(args->type, &atomic);
} else {
unit_return_fail(m, "Test error: invalid op in %s\n",
__func__);
}
expected_val = args->start_val +
(i * delta_magnitude * ATOMIC_OP_SIGN(args->op));
/* sanity check */
if ((args->type == ATOMIC_32) &&
((expected_val > INT_MAX) || (expected_val < INT_MIN))) {
unit_return_fail(m, "Test error: invalid value in %s\n",
__func__);
}
if (read_val != expected_val) {
unit_return_fail(m, "Atomic returned wrong value. "
"Expected: %ld Received: %ld\n",
(long)expected_val, (long)read_val);
}
if (check_result_bool) {
if (((expected_val == 0) && !result_bool) ||
((expected_val != 0) && result_bool)) {
unit_return_fail(m,
"Test result incorrect\n");
}
}
}
return UNIT_SUCCESS;
}
static void cmpxchg_inc(enum atomic_type type, struct atomic_struct *ref)
{
bool done = false;
long old;
while (!done) {
old = func_read(type, ref);
if (old == func_cmpxchg(type, ref, old, old + 1)) {
done = true;
}
}
}
/*
* Support function that runs in the threads for the arithmetic threaded
* test below
*/
static void *arithmetic_thread(void *__args)
{
struct atomic_thread_info *targs = (struct atomic_thread_info *)__args;
unsigned int i;
pthread_barrier_wait(&thread_barrier);
for (i = 0; i < targs->margs->loop_count; i++) {
if (targs->margs->op == op_cmpxchg) {
/* special case with special function */
cmpxchg_inc(targs->margs->type, targs->atomic);
} else if (targs->margs->op == op_inc) {
func_inc(targs->margs->type, targs->atomic);
} else if (targs->margs->op == op_dec) {
func_dec(targs->margs->type, targs->atomic);
} else if (targs->margs->op == op_add) {
/*
* Save the last value to sanity that threads aren't
* running sequentially
*/
targs->final_val = func_add_return(
targs->margs->type,
targs->margs->value,
targs->atomic);
} else if (targs->margs->op == op_add) {
func_add(targs->margs->type, targs->margs->value,
targs->atomic);
} else if (targs->margs->op == op_sub) {
func_sub(targs->margs->type, targs->margs->value,
targs->atomic);
} else if (targs->margs->op == op_inc_and_test) {
if (func_inc_and_test(targs->margs->type,
targs->atomic)) {
/*
* Only increment if atomic op returns true
* (that the value is 0)
*/
targs->iterations++;
}
} else if (targs->margs->op == op_dec_and_test) {
if (func_dec_and_test(targs->margs->type,
targs->atomic)) {
/*
* Only increment if atomic op returns true
* (that the value is 0)
*/
targs->iterations++;
}
} else if (targs->margs->op == op_sub_and_test) {
if (func_sub_and_test(targs->margs->type,
targs->margs->value,
targs->atomic)) {
/*
* Only increment if atomic op returns true
* (that the value is 0)
*/
targs->iterations++;
}
} else if (targs->margs->op == op_add_unless) {
if (func_add_unless(targs->margs->type,
targs->atomic, targs->margs->value,
targs->unless) != targs->unless) {
/*
* Increment until the atomic value is the
* "unless" value.
*/
targs->iterations++;
}
} else {
/*
* Don't print an error here because it would print
* for each thread. The main thread will catch this.
*/
break;
}
}
return NULL;
}
/*
* Support function to make sure the threaded arithmetic tests ran the correct
* number of iterations across threads, if applicable.
*/
static bool correct_thread_iteration_count(struct unit_module *m,
struct atomic_thread_info *threads,
unsigned int num_threads,
long expected_iterations)
{
unsigned int i;
long total_iterations = 0;
for (i = 0; i < num_threads; i++) {
total_iterations += threads[i].iterations;
}
if (total_iterations != expected_iterations) {
unit_err(m, "threaded test op took unexpected number of "
"iterations expected %ld took: %ld\n",
expected_iterations, total_iterations);
return false;
}
return true;
}
/*
* Test arithmetic operations in threads to verify atomicity.
*
* Sets initial start value
* Kicks off threads to loop running ops
* When threads finish loops, verify values
*
* With the ops that have a return, save the final value for each thread and
* use that to try to ensure that the threads aren't executing sequentially.
*/
static int test_atomic_arithmetic_threaded(struct unit_module *m,
struct gk20a *g, void *__args)
{
struct atomic_test_args *args = (struct atomic_test_args *)__args;
struct atomic_struct atomic = {0};
const unsigned int num_threads = 100;
struct atomic_thread_info threads[num_threads];
unsigned int i;
long expected_val, val, expected_iterations;
int ret = UNIT_SUCCESS;
if (single_set_and_read(m, &atomic, args->type, args->start_val)
!= UNIT_SUCCESS) {
return UNIT_FAIL;
}
pthread_barrier_init(&thread_barrier, NULL, num_threads);
/* setup and start threads */
for (i = 0; i < num_threads; i++) {
threads[i].atomic = &atomic;
threads[i].margs = args;
threads[i].thread_num = i;
threads[i].iterations = 0;
/* For add_unless, add until we hit half the iterations */
threads[i].unless = args->start_val +
(num_threads * args->loop_count / 2);
pthread_create(&threads[i].thread, NULL, arithmetic_thread,
&threads[i]);
}
/* wait for all threads to complete */
for (i = 0; i < num_threads; i++) {
pthread_join(threads[i].thread, NULL);
}
val = func_read(args->type, &atomic);
switch (args->op) {
case op_add_unless:
/*
* For add_unless, the threads increment their iteration
* counts until the atomic reaches the unless value,
* but continue calling the op in the loop to make sure
* it doesn't actually add anymore.
*/
expected_iterations = (threads[0].unless -
args->start_val + 1) /
args->value;
if (!correct_thread_iteration_count(m, threads,
num_threads, expected_iterations)) {
ret = UNIT_FAIL;
goto exit;
}
expected_val = threads[0].unless;
break;
case op_inc_and_test:
case op_dec_and_test:
case op_sub_and_test:
/*
* The threads only increment when the atomic op
* reports that it hit 0 which should only happen once.
*/
if (!correct_thread_iteration_count(m, threads,
num_threads, 1)) {
ret = UNIT_FAIL;
goto exit;
}
/* fall through! */
case op_add:
case op_sub:
case op_inc:
case op_dec:
case op_cmpxchg:
expected_val = args->start_val +
(args->loop_count * num_threads *
ATOMIC_OP_SIGN(args->op) * args->value);
break;
default:
unit_err(m, "Test error: invalid op in %s\n", __func__);
ret = UNIT_FAIL;
goto exit;
}
/* sanity check */
if ((args->type == ATOMIC_32) &&
((expected_val > INT_MAX) || (expected_val < INT_MIN))) {
unit_err(m, "Test error: invalid value in %s\n", __func__);
ret = UNIT_FAIL;
goto exit;
}
if (val != expected_val) {
unit_err(m, "threaded value incorrect expected: %ld "
"result: %ld\n",
expected_val, val);
ret = UNIT_FAIL;
goto exit;
}
if (args->op == op_add) {
/* sanity test that the threads aren't all sequential */
bool sequential = true;
for (i = 0; i < (num_threads - 1); i++) {
if (abs(threads[i].final_val - threads[i+1].final_val)
!= (long)args->loop_count) {
sequential = false;
break;
}
}
if (sequential) {
unit_err(m, "threads appear to have run "
"sequentially!\n");
ret = UNIT_FAIL;
goto exit;
}
}
exit:
pthread_barrier_destroy(&thread_barrier);
if (args->type == NOT_ATOMIC) {
/* For the non-atomics, pass is fail and fail is pass */
return INVERTED_RESULT(ret);
} else {
return ret;
}
}
/*
* Thread function for the test_atomic_arithmetic_and_test_threaded() test.
* Calls the *_and_inc_test op once and saves whether the op returned true by
* incrementing in the iterations thread struct.
*/
static void *arithmetic_and_test_updater_thread(void *__args)
{
struct atomic_thread_info *targs = (struct atomic_thread_info *)__args;
struct atomic_struct *atomic_p = targs->atomic;
bool is_zero;
while (true) {
/* wait here to start */
pthread_barrier_wait(&thread_barrier);
if (stop_threads) {
return NULL;
}
switch (targs->margs->op) {
case op_inc_and_test:
is_zero = func_inc_and_test(targs->margs->type,
atomic_p);
break;
case op_dec_and_test:
is_zero = func_dec_and_test(targs->margs->type,
atomic_p);
break;
case op_sub_and_test:
is_zero = func_sub_and_test(targs->margs->type,
targs->margs->value, atomic_p);
break;
default:
/* designate failure */
is_zero = false;
break;
}
if (is_zero) {
/*
* Only count iterations where the op says the value
* is 0
*/
targs->iterations++;
}
/* wait until everyone finishes this iteration */
pthread_barrier_wait(&thread_barrier);
}
return NULL;
}
/*
* Test arithmetic *_and_test functions in threads to verify atomicity
*
* Set the atomic to a value to allow the arithmetic op to pass 0.
* Start a lot of threads that will each execute the atomic op once.
* Check iteration count to make sure only one thread saw 0.
* Note: The final value isn't verified because we are testing the atomicity
* of the operation and the testing. And the non-atomic case may fail
* the final value before failing the test being tested for.
* Repeat until reaching the input argument repeat_count or seeing a failure.
*/
static int test_atomic_arithmetic_and_test_threaded(struct unit_module *m,
struct gk20a *g, void *__args)
{
struct atomic_test_args *args = (struct atomic_test_args *)__args;
struct atomic_struct atomic;
const int num_threads = 100;
/* Start the atomic such that half the threads will potentially see 0 */
const long start_val = 0 - (ATOMIC_OP_SIGN(args->op) * num_threads / 2);
struct atomic_thread_info threads[num_threads];
int i;
unsigned int repeat = args->repeat_count;
int result = UNIT_SUCCESS;
pthread_barrier_init(&thread_barrier, NULL, num_threads + 1);
stop_threads = false;
do {
if (single_set_and_read(m, &atomic, args->type, start_val) !=
UNIT_SUCCESS) {
return UNIT_FAIL;
}
/* setup threads */
for (i = 0; i < num_threads; i++) {
threads[i].iterations = 0;
if (repeat == args->repeat_count) {
threads[i].atomic = &atomic;
threads[i].margs = args;
threads[i].thread_num = i;
pthread_create(&threads[i].thread, NULL,
arithmetic_and_test_updater_thread,
&threads[i]);
}
}
/* start threads */
pthread_barrier_wait(&thread_barrier);
/* wait for all threads to complete */
pthread_barrier_wait(&thread_barrier);
/*
* The threads only count iterations where the test func
* returns true. So, this should only happen once.
*/
if (!correct_thread_iteration_count(m, threads,
num_threads, 1)) {
result = UNIT_FAIL;
break;
}
} while (repeat-- > 0);
/* signal the end to the threads, then wake them */
stop_threads = true;
pthread_barrier_wait(&thread_barrier);
/* wait for all threads to exit */
for (i = 0; i < num_threads; i++) {
pthread_join(threads[i].thread, NULL);
}
pthread_barrier_destroy(&thread_barrier);
if (args->type == NOT_ATOMIC) {
/* For the non-atomics, pass is fail and fail is pass */
return INVERTED_RESULT(result);
} else {
return result;
}
}
/*
* Test xchg op single threaded for proper functionality
*
* Loops calling xchg op with different values making sure the returned
* value is the last one written.
*/
static int test_atomic_xchg(struct unit_module *m,
struct gk20a *g, void *__args)
{
struct atomic_test_args *args = (struct atomic_test_args *)__args;
struct atomic_struct atomic = {0};
unsigned int i;
long new_val, old_val, ret_val;
if (single_set_and_read(m, &atomic, args->type, args->start_val)
!= UNIT_SUCCESS) {
return UNIT_FAIL;
}
old_val = args->start_val;
for (i = 0; i < args->loop_count; i++) {
/*
* alternate positive and negative values while increasing
* based on the loop counter
*/
new_val = (i % 2 ? 1 : -1) * (args->start_val + i);
/* only a 32bit xchg op */
ret_val = func_xchg(args->type, &atomic, new_val);
if (ret_val != old_val) {
unit_return_fail(m, "xchg returned bad old val "
"Expected: %ld, Received: %ld\n",
old_val, ret_val);
}
old_val = new_val;
}
return UNIT_SUCCESS;
}
/*
* Function to do xchg operation for the test_atomic_xchg_threaded() test
*
* Each thread will run a for loop which will xchg its value with the atomic
* See the main test for more details
*/
static void *xchg_thread(void *__args)
{
struct atomic_thread_info *targs = (struct atomic_thread_info *)__args;
unsigned int i;
while (true) {
/* wait here to start iteration */
pthread_barrier_wait(&thread_barrier);
if (stop_threads) {
return NULL;
}
for (i = 0; i < 1000; i++) {
targs->xchg_val = func_xchg(targs->margs->type,
targs->atomic, targs->xchg_val);
}
/* wait until everyone finishes this iteration */
pthread_barrier_wait(&thread_barrier);
}
return NULL;
}
/*
* Test atomic exchange operation
*
* Set the atomic to a starting value.
* Setup and start the exchange threads.
* Setup includes setting each thread's "xchg_val" to its thread number.
* When threads complete, loop through the thread's xchg_val and make sure
* each number is unique and someone still has the starting value.
*/
static int test_atomic_xchg_threaded(struct unit_module *m,
struct gk20a *g, void *__args)
{
struct atomic_test_args *args = (struct atomic_test_args *)__args;
struct atomic_struct atomic = {0};
const unsigned int num_threads = 100;
struct atomic_thread_info threads[num_threads];
unsigned int i;
unsigned int repeat = args->repeat_count;
int result = UNIT_SUCCESS;
const long start_val = -999;
bool start_val_present;
pthread_barrier_init(&thread_barrier, NULL, num_threads + 1);
stop_threads = false;
do {
/* start at -999 */
if (single_set_and_read(m, &atomic, args->type, start_val) !=
UNIT_SUCCESS) {
result = UNIT_FAIL;
goto exit;
}
/* setup threads */
for (i = 0; i < num_threads; i++) {
threads[i].iterations = 0;
threads[i].xchg_val = i;
if (repeat == args->repeat_count) {
threads[i].atomic = &atomic;
threads[i].margs = args;
threads[i].thread_num = i;
pthread_create(&threads[i].thread, NULL,
xchg_thread, &threads[i]);
}
}
/* start threads */
pthread_barrier_wait(&thread_barrier);
/* wait for all threads to complete */
pthread_barrier_wait(&thread_barrier);
start_val_present = false;
for (i = 0; i < num_threads; i++) {
unsigned int j;
if (threads[i].xchg_val == start_val) {
start_val_present = true;
}
for (j = (i + 1); j < num_threads; j++) {
if (threads[i].xchg_val ==
threads[j].xchg_val) {
unit_err(m, "duplicate value\n");
result = UNIT_FAIL;
goto exit;
}
}
}
if ((func_read(args->type, &atomic) != start_val) &&
!start_val_present) {
unit_err(m, "start value no present\n");
result = UNIT_FAIL;
goto exit;
}
} while (repeat-- > 0);
exit:
/* signal the end to the threads, then wake them */
stop_threads = true;
pthread_barrier_wait(&thread_barrier);
/* wait for all threads to exit */
for (i = 0; i < num_threads; i++) {
pthread_join(threads[i].thread, NULL);
}
pthread_barrier_destroy(&thread_barrier);
if (args->type == NOT_ATOMIC) {
/* For the non-atomics, pass is fail and fail is pass */
return INVERTED_RESULT(result);
} else {
return result;
}
}
/*
* Test cmpxchg single threaded for proper functionality
*
* Loop calling cmpxchg. Alternating between matching and not matching.
* Verify correct behavior for each call.
*/
static int test_atomic_cmpxchg(struct unit_module *m,
struct gk20a *g, void *__args)
{
struct atomic_test_args *args = (struct atomic_test_args *)__args;
struct atomic_struct atomic = {0};
const int switch_interval = 5;
unsigned int i;
long new_val, old_val, ret_val;
bool should_match = true;
if (single_set_and_read(m, &atomic, args->type, args->start_val)
!= UNIT_SUCCESS) {
return UNIT_FAIL;
}
old_val = args->start_val;
for (i = 0; i < args->loop_count; i++) {
/*
* alternate whether the cmp should match each
* switch_interval
*/
if ((i % switch_interval) == 0) {
should_match = !should_match;
}
new_val = args->start_val + i;
if (should_match) {
ret_val = func_cmpxchg(args->type, &atomic,
old_val, new_val);
if (ret_val != old_val) {
unit_return_fail(m,
"cmpxchg returned bad old val "
"Expected: %ld, Received: %ld\n",
old_val, ret_val);
}
ret_val = func_read(args->type, &atomic);
if (ret_val != new_val) {
unit_return_fail(m,
"cmpxchg did not update "
"Expected: %ld, Received: %ld\n",
new_val, ret_val);
}
old_val = new_val;
} else {
ret_val = func_cmpxchg(args->type, &atomic,
-1 * old_val, new_val);
if (ret_val != old_val) {
unit_return_fail(m,
"cmpxchg returned bad old val "
"Expected: %ld, Received: %ld\n",
old_val, ret_val);
}
ret_val = func_read(args->type, &atomic);
if (ret_val != old_val) {
unit_return_fail(m,
"cmpxchg should not have updated "
"Expected: %ld, Received: %ld\n",
old_val, ret_val);
}
}
}
return UNIT_SUCCESS;
}
/*
* Test add_unless op single threaded for proper functionality
*
* Note: there is only a 32-bit operation
*
* Loop through calling the operation. Alternating whether the add should
* occur or not (i.e. changing the "unless" value).
* Verify correct behavior for each operation.
*/
static int test_atomic_add_unless(struct unit_module *m,
struct gk20a *g, void *__args)
{
struct atomic_test_args *args = (struct atomic_test_args *)__args;
struct atomic_struct atomic = {0};
const int switch_interval = 5;
unsigned int i;
int new_val, old_val, ret_val;
bool should_update = true;
if (single_set_and_read(m, &atomic, args->type, args->start_val)
!= UNIT_SUCCESS) {
return UNIT_FAIL;
}
old_val = args->start_val;
for (i = 0; i < args->loop_count; i++) {
/* alternate whether add should occur every switch_interval */
if ((i % switch_interval) == 0) {
should_update = !should_update;
}
if (should_update) {
/* This will fail to match and do the add */
ret_val = func_add_unless(args->type, &atomic,
args->value, old_val - 1);
if (ret_val != old_val) {
unit_return_fail(m,
"add_unless returned bad old val "
"Expected: %d, Received: %d\n",
old_val, ret_val);
}
new_val = old_val + args->value;
ret_val = func_read(args->type, &atomic);
if (ret_val != new_val) {
unit_return_fail(m, "add_unless did not "
"update Expected: %d, "
"Received: %d\n",
new_val, ret_val);
}
old_val = ret_val;
} else {
/* This will match the old value and won't add */
ret_val = func_add_unless(args->type, &atomic,
args->value, old_val);
if (ret_val != old_val) {
unit_return_fail(m,
"add_unless returned bad old val "
"Expected: %d, Received: %d\n",
old_val, ret_val);
}
ret_val = func_read(args->type, &atomic);
if (ret_val != old_val) {
unit_return_fail(m, "add_unless should not "
"have updated Expected: %d, "
"Received: %d\n",
old_val, ret_val);
}
}
}
return UNIT_SUCCESS;
}
static struct atomic_test_args set_and_read_32_arg = {
.type = ATOMIC_32,
};
static struct atomic_test_args set_and_read_64_arg = {
.type = ATOMIC_64,
};
static struct atomic_test_args inc_32_arg = {
.op = op_inc,
.type = ATOMIC_32,
.start_val = -500,
.loop_count = 10000,
.value = 1,
};
static struct atomic_test_args inc_and_test_not_atomic_arg = {
/* must cross 0 */
.op = op_inc_and_test,
.type = NOT_ATOMIC,
.start_val = -500,
.loop_count = 10000,
.value = 1,
.repeat_count = 5000, /* for threaded test */
};
static struct atomic_test_args inc_and_test_32_arg = {
/* must cross 0 */
.op = op_inc_and_test,
.type = ATOMIC_32,
.start_val = -500,
.loop_count = 10000,
.value = 1,
.repeat_count = 5000, /* for threaded test */
};
static struct atomic_test_args inc_and_test_64_arg = {
/* must cross 0 */
.op = op_inc_and_test,
.type = ATOMIC_64,
.start_val = -500,
.loop_count = 10000,
.value = 1,
.repeat_count = 5000, /* for threaded test */
};
static struct atomic_test_args inc_64_arg = {
.op = op_inc,
.type = ATOMIC_64,
.start_val = INT_MAX - 500,
.loop_count = 10000,
.value = 1,
};
static struct atomic_test_args dec_32_arg = {
.op = op_dec,
.type = ATOMIC_32,
.start_val = 500,
.loop_count = 10000,
.value = 1,
};
static struct atomic_test_args dec_and_test_not_atomic_arg = {
/* must cross 0 */
.op = op_dec_and_test,
.type = NOT_ATOMIC,
.start_val = 500,
.loop_count = 10000,
.value = 1,
.repeat_count = 5000, /* for threaded test */
};
static struct atomic_test_args dec_and_test_32_arg = {
/* must cross 0 */
.op = op_dec_and_test,
.type = ATOMIC_32,
.start_val = 500,
.loop_count = 10000,
.value = 1,
.repeat_count = 5000, /* for threaded test */
};
static struct atomic_test_args dec_and_test_64_arg = {
/* must cross 0 */
.op = op_dec_and_test,
.type = ATOMIC_64,
.start_val = 500,
.loop_count = 10000,
.value = 1,
.repeat_count = 5000, /* for threaded test */
};
static struct atomic_test_args dec_64_arg = {
.op = op_dec,
.type = ATOMIC_64,
.start_val = INT_MIN + 500,
.loop_count = 10000,
.value = 1,
};
static struct atomic_test_args add_32_arg = {
.op = op_add,
.type = ATOMIC_32,
.start_val = -500,
.loop_count = 10000,
.value = 7,
};
static struct atomic_test_args add_64_arg = {
.op = op_add,
.type = ATOMIC_64,
.start_val = INT_MAX - 500,
.loop_count = 10000,
.value = 7,
};
struct atomic_test_args sub_32_arg = {
.op = op_sub,
.type = ATOMIC_32,
.start_val = 500,
.loop_count = 10000,
.value = 7,
};
static struct atomic_test_args sub_64_arg = {
.op = op_sub,
.type = ATOMIC_64,
.start_val = INT_MIN + 500,
.loop_count = 10000,
.value = 7,
};
static struct atomic_test_args sub_and_test_not_atomic_arg = {
/* must cross 0 */
.op = op_sub_and_test,
.type = NOT_ATOMIC,
.start_val = 500,
.loop_count = 10000,
.value = 5,
.repeat_count = 5000, /* for threaded test */
};
static struct atomic_test_args sub_and_test_32_arg = {
/* must cross 0 */
.op = op_sub_and_test,
.type = ATOMIC_32,
.start_val = 500,
.loop_count = 10000,
.value = 5,
.repeat_count = 5000, /* for threaded test */
};
static struct atomic_test_args sub_and_test_64_arg = {
/* must cross 0 */
.op = op_sub_and_test,
.type = ATOMIC_64,
.start_val = 500,
.loop_count = 10000,
.value = 5,
.repeat_count = 5000, /* for threaded test */
};
struct atomic_test_args xchg_not_atomic_arg = {
.op = op_cmpxchg,
.type = NOT_ATOMIC,
.start_val = 1,
.value = 1,
.loop_count = 10000,
.repeat_count = 10000, /* for threaded test */
};
struct atomic_test_args xchg_32_arg = {
.op = op_cmpxchg,
.type = ATOMIC_32,
.start_val = 1,
.value = 1,
.loop_count = 10000,
.repeat_count = 10000, /* for threaded test */
};
struct atomic_test_args xchg_64_arg = {
.op = op_cmpxchg,
.type = ATOMIC_64,
.start_val = INT_MAX,
.value = 1,
.loop_count = 10000,
.repeat_count = 2000, /* for threaded test */
};
static struct atomic_test_args add_unless_32_arg = {
/* must loop at least 10 times */
.op = op_add_unless,
.type = ATOMIC_32,
.start_val = -500,
.loop_count = 10000,
.value = 5,
};
static struct atomic_test_args add_unless_64_arg = {
/* must loop at least 10 times */
.op = op_add_unless,
.type = ATOMIC_64,
.start_val = -500,
.loop_count = 10000,
.value = 5,
};
struct unit_module_test atomic_tests[] = {
/* Level 0 tests */
UNIT_TEST(atomic_set_and_read_32, test_atomic_set_and_read, &set_and_read_32_arg, 0),
UNIT_TEST(atomic_set_and_read_64, test_atomic_set_and_read, &set_and_read_64_arg, 0),
UNIT_TEST(atomic_inc_32, test_atomic_arithmetic, &inc_32_arg, 0),
UNIT_TEST(atomic_inc_and_test_32, test_atomic_arithmetic, &inc_and_test_32_arg, 0),
UNIT_TEST(atomic_inc_and_test_64, test_atomic_arithmetic, &inc_and_test_64_arg, 0),
UNIT_TEST(atomic_inc_64, test_atomic_arithmetic, &inc_64_arg, 0),
UNIT_TEST(atomic_dec_32, test_atomic_arithmetic, &dec_32_arg, 0),
UNIT_TEST(atomic_dec_64, test_atomic_arithmetic, &dec_64_arg, 0),
UNIT_TEST(atomic_dec_and_test_32, test_atomic_arithmetic, &dec_and_test_32_arg, 0),
UNIT_TEST(atomic_dec_and_test_64, test_atomic_arithmetic, &dec_and_test_64_arg, 0),
UNIT_TEST(atomic_add_32, test_atomic_arithmetic, &add_32_arg, 0),
UNIT_TEST(atomic_add_64, test_atomic_arithmetic, &add_64_arg, 0),
UNIT_TEST(atomic_sub_32, test_atomic_arithmetic, &sub_32_arg, 0),
UNIT_TEST(atomic_sub_64, test_atomic_arithmetic, &sub_64_arg, 0),
UNIT_TEST(atomic_sub_and_test_32, test_atomic_arithmetic, &sub_and_test_32_arg, 0),
UNIT_TEST(atomic_sub_and_test_64, test_atomic_arithmetic, &sub_and_test_64_arg, 0),
UNIT_TEST(atomic_xchg_32, test_atomic_xchg, &xchg_32_arg, 0),
UNIT_TEST(atomic_xchg_64, test_atomic_xchg, &xchg_64_arg, 0),
UNIT_TEST(atomic_cmpxchg_32, test_atomic_cmpxchg, &xchg_32_arg, 0),
UNIT_TEST(atomic_cmpxchg_64, test_atomic_cmpxchg, &xchg_64_arg, 0),
UNIT_TEST(atomic_add_unless_32, test_atomic_add_unless, &add_unless_32_arg, 0),
UNIT_TEST(atomic_add_unless_64, test_atomic_add_unless, &add_unless_64_arg, 0),
UNIT_TEST(atomic_inc_32_threaded, test_atomic_arithmetic_threaded, &inc_32_arg, 0),
UNIT_TEST(atomic_inc_64_threaded, test_atomic_arithmetic_threaded, &inc_64_arg, 0),
UNIT_TEST(atomic_dec_32_threaded, test_atomic_arithmetic_threaded, &dec_32_arg, 0),
UNIT_TEST(atomic_dec_64_threaded, test_atomic_arithmetic_threaded, &dec_64_arg, 0),
UNIT_TEST(atomic_add_32_threaded, test_atomic_arithmetic_threaded, &add_32_arg, 0),
UNIT_TEST(atomic_add_64_threaded, test_atomic_arithmetic_threaded, &add_64_arg, 0),
UNIT_TEST(atomic_sub_32_threaded, test_atomic_arithmetic_threaded, &sub_32_arg, 0),
UNIT_TEST(atomic_sub_64_threaded, test_atomic_arithmetic_threaded, &sub_64_arg, 0),
UNIT_TEST(atomic_cmpxchg_not_atomic_threaded, test_atomic_arithmetic_threaded, &xchg_not_atomic_arg, 0),
UNIT_TEST(atomic_cmpxchg_32_threaded, test_atomic_arithmetic_threaded, &xchg_32_arg, 0),
UNIT_TEST(atomic_cmpxchg_64_threaded, test_atomic_arithmetic_threaded, &xchg_64_arg, 0),
/* Level 1 tests */
UNIT_TEST(atomic_inc_and_test_not_atomic_threaded, test_atomic_arithmetic_and_test_threaded, &inc_and_test_not_atomic_arg, 1),
UNIT_TEST(atomic_inc_and_test_32_threaded, test_atomic_arithmetic_and_test_threaded, &inc_and_test_32_arg, 1),
UNIT_TEST(atomic_inc_and_test_64_threaded, test_atomic_arithmetic_and_test_threaded, &inc_and_test_64_arg, 1),
UNIT_TEST(atomic_dec_and_test_not_atomic_threaded, test_atomic_arithmetic_and_test_threaded, &dec_and_test_not_atomic_arg, 1),
UNIT_TEST(atomic_dec_and_test_32_threaded, test_atomic_arithmetic_and_test_threaded, &dec_and_test_32_arg, 1),
UNIT_TEST(atomic_dec_and_test_64_threaded, test_atomic_arithmetic_and_test_threaded, &dec_and_test_64_arg, 1),
UNIT_TEST(atomic_sub_and_test_not_atomic_threaded, test_atomic_arithmetic_and_test_threaded, &sub_and_test_not_atomic_arg, 1),
UNIT_TEST(atomic_sub_and_test_32_threaded, test_atomic_arithmetic_and_test_threaded, &sub_and_test_32_arg, 1),
UNIT_TEST(atomic_sub_and_test_64_threaded, test_atomic_arithmetic_and_test_threaded, &sub_and_test_64_arg, 1),
UNIT_TEST(atomic_add_unless_32_threaded, test_atomic_arithmetic_threaded, &add_unless_32_arg, 1),
UNIT_TEST(atomic_add_unless_64_threaded, test_atomic_arithmetic_threaded, &add_unless_64_arg, 1),
UNIT_TEST(atomic_xchg_not_atomic_threaded, test_atomic_xchg_threaded, &xchg_not_atomic_arg, 1),
UNIT_TEST(atomic_xchg_32_threaded, test_atomic_xchg_threaded, &xchg_32_arg, 1),
UNIT_TEST(atomic_xchg_64_threaded, test_atomic_xchg_threaded, &xchg_64_arg, 1),
};
UNIT_MODULE(atomic, atomic_tests, UNIT_PRIO_POSIX_TEST);