std::atomic

在单生产者单消费者的环形队列缓冲模型中，消费线程改变readPos_,生产线程改变writePos_，两者不互相干扰，但是我们仍然需要保证readPos_和writePos_的原子性。一种选择是使用C++的std::atomic模板类型。但是这种做法对性能的影响是怎么样的呢？带着这个问题，我做了一下测试。

测试环境：

Ubuntu 16.04虚拟机
主存频率：1600Mhz

readPos和writePos分别是内置类型size_t的时候

对一个std::size_t类型进行1亿次的++操作。

#include <time.h>
#include <stdio.h>

unsigned long long time_ns()
{
    struct timespec ts;
    clock_gettime(CLOCK_REALTIME, &ts);
    return ts.tv_nsec + ts.tv_sec*1000000000LLU;
}

int main()
{
    std::size_t a;
    const long long unsigned TESTTIMES = 100000000;
    long long unsigned start_ns = time_ns();
    for(long long unsigned i = 0; i < TESTTIMES; i++)
    {
        a++;
    }
    long long unsigned delta = time_ns()-start_ns;
    printf("%llu times tests use %llu ns(%.4f ns each)\n", TESTTIMES, delta, (delta)/(float)TESTTIMES);
}

执行结果：

100000000 times tests use 229754536 ns(2.2975 ns each)

readPos和writePos是std::atomic类型

在没有竞争的情况下：

#include <atomic>
#include <time.h>
#include <stdio.h>

unsigned long long time_ns()
{
    struct timespec ts;
    clock_gettime(CLOCK_REALTIME, &ts);
    return ts.tv_nsec + ts.tv_sec*1000000000LLU;
}

int main()
{
    std::atomic<std::size_t> a;

    std::cout << std::boolalpha 
        << "std::atomic<std::size_t> is lock_free? "
        << std::atomic<std::size_t>().is_lock_free() << endl;

    const long long unsigned TESTTIMES = 100000000;
    long long unsigned start_ns = time_ns();
    for(long long unsigned i = 0; i < TESTTIMES; i++)
    {
        a++;
    }
    long long unsigned delta = time_ns()-start_ns;
    printf("%llu times tests use %llu ns(%.4f ns each)\n", TESTTIMES, delta, (delta)/(float)TESTTIMES);
}

执行结果：

std::atomic<std::size_t> is lock_free? true
100000000 times tests use 985817387 ns(9.8582 ns each)

现在我们开启另外一个线程，两个线程同时对a进行++操作，各执行1亿次，使其有竞争出现。


void *threadFun(void* p)
{
    std::atomic<long long unsigned>& a = *(static_cast<atomic<long long unsigned>*>(p));
    const long long unsigned TESTTIMES = 100000000;
    long long unsigned start_ns = time_ns();
    for(long long unsigned i = 0; i < TESTTIMES; i++)
    {
        a++;
    }
    long long unsigned delta = time_ns()-start_ns;
    printf("%llu times tests use %llu ns(%.4f ns each)\n", TESTTIMES, delta, (delta)/(float)TESTTIMES);
}

int main()
{
    std::atomic<std::size_t> a;
    a = 0;
    pthread_t t;
    pthread_create(&t, NULL, threadFun, (void*)&a);
    
    std::cout << std::boolalpha 
        << "std::atomic<std::size_t> is lock_free? "
        << std::atomic<std::size_t>().is_lock_free() << endl;

    const long long unsigned TESTTIMES = 100000000;
    long long unsigned start_ns = time_ns();
    for(long long unsigned i = 0; i < TESTTIMES; i++)
    {
        a++;
    }
    long long unsigned delta = time_ns()-start_ns;
    printf("%llu times tests use %llu ns(%.4f ns each)\n", TESTTIMES, delta, (delta)/(float)TESTTIMES);
    pthread_join(t, NULL);
    cout << a << endl; //输出最终的结果
}

执行结果：

std::atomic<std::size_t> is lock_free? true
100000000 times tests use 1695992960 ns(16.9599 ns each)
100000000 times tests use 1859780698 ns(18.5978 ns each)
200000000

atomic模板类型不是lock_free

我们定义一个Test类，该类型如果作为atomic的模板参数，则不是lock_free。

struct Test {
    int b[100];
};

void *threadFun(void* p)
{
    std::atomic<Test>& a = *(static_cast<atomic<Test>*>(p));    
    const long long unsigned TESTTIMES = 100000000;
    long long unsigned start_ns = time_ns();
    for(long long unsigned i = 0; i < TESTTIMES; i++)
    {
        ((Test)a).b[0]++;
    }
    long long unsigned delta = time_ns()-start_ns;
    printf("%llu times tests use %llu ns(%.4f ns each)\n", TESTTIMES, delta, (delta)/(float)TESTTIMES);
}

int main()
{
    std::atomic<Test> a;
    ((Test)a).b[0] = 0;
    pthread_t t;
    pthread_create(&t, NULL, threadFun, (void*)&a);
    
    std::cout << std::boolalpha 
        << "std::atomic<Test> is lock_free? "
        << std::atomic<Test>().is_lock_free() << endl;

    const long long unsigned TESTTIMES = 100000000;
    long long unsigned start_ns = time_ns();
    for(long long unsigned i = 0; i < TESTTIMES; i++)
    {
        ((Test)a).b[0]++;
    }
    long long unsigned delta = time_ns()-start_ns;
    printf("%llu times tests use %llu ns(%.4f ns each)\n", TESTTIMES, delta, (delta)/(float)TESTTIMES);
    pthread_join(t, NULL);
    cout << ((Test)a).b[0] << endl;
}

执行结果：

std::atomic<Test> is lock_free? false
100000000 times tests use 33430659148 ns(334.3066 ns each)
100000000 times tests use 33619378185 ns(336.1938 ns each)
1615295328

可见，使用atomic封装原子对象，针对不同的类型，其底层手段是不一样的，开销也就不一样。而无锁（lock_free）的底层实现比加锁保证原子性的底层实现，开销差异非常大，多大几十倍。

回到最开始的问题，如果使用的std::size_t类型，是无锁的。