boost/compute/algorithm/fill.hpp
//---------------------------------------------------------------------------//
// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//
#ifndef BOOST_COMPUTE_ALGORITHM_FILL_HPP
#define BOOST_COMPUTE_ALGORITHM_FILL_HPP
#include <iterator>
#include <boost/mpl/int.hpp>
#include <boost/mpl/vector.hpp>
#include <boost/mpl/contains.hpp>
#include <boost/utility/enable_if.hpp>
#include <boost/compute/cl.hpp>
#include <boost/compute/system.hpp>
#include <boost/compute/command_queue.hpp>
#include <boost/compute/algorithm/copy.hpp>
#include <boost/compute/async/future.hpp>
#include <boost/compute/iterator/constant_iterator.hpp>
#include <boost/compute/iterator/discard_iterator.hpp>
#include <boost/compute/detail/is_buffer_iterator.hpp>
#include <boost/compute/detail/iterator_range_size.hpp>
namespace boost {
namespace compute {
namespace detail {
namespace mpl = boost::mpl;
// fills the range [first, first + count) with value using copy()
template<class BufferIterator, class T>
inline void fill_with_copy(BufferIterator first,
size_t count,
const T &value,
command_queue &queue)
{
::boost::compute::copy(
::boost::compute::make_constant_iterator(value, 0),
::boost::compute::make_constant_iterator(value, count),
first,
queue
);
}
// fills the range [first, first + count) with value using copy_async()
template<class BufferIterator, class T>
inline future<void> fill_async_with_copy(BufferIterator first,
size_t count,
const T &value,
command_queue &queue)
{
return ::boost::compute::copy_async(
::boost::compute::make_constant_iterator(value, 0),
::boost::compute::make_constant_iterator(value, count),
first,
queue
);
}
#if defined(CL_VERSION_1_2)
// meta-function returing true if Iterator points to a range of values
// that can be filled using clEnqueueFillBuffer(). to meet this criteria
// it must have a buffer accessible through iter.get_buffer() and the
// size of its value_type must by in {1, 2, 4, 8, 16, 32, 64, 128}.
template<class Iterator>
struct is_valid_fill_buffer_iterator :
public mpl::and_<
is_buffer_iterator<Iterator>,
mpl::contains<
mpl::vector<
mpl::int_<1>,
mpl::int_<2>,
mpl::int_<4>,
mpl::int_<8>,
mpl::int_<16>,
mpl::int_<32>,
mpl::int_<64>,
mpl::int_<128>
>,
mpl::int_<
sizeof(typename std::iterator_traits<Iterator>::value_type)
>
>
>::type { };
template<>
struct is_valid_fill_buffer_iterator<discard_iterator> : public boost::false_type {};
// specialization which uses clEnqueueFillBuffer for buffer iterators
template<class BufferIterator, class T>
inline void
dispatch_fill(BufferIterator first,
size_t count,
const T &value,
command_queue &queue,
typename boost::enable_if<
is_valid_fill_buffer_iterator<BufferIterator>
>::type* = 0)
{
typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
if(count == 0){
// nothing to do
return;
}
// check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
if(!queue.check_device_version(1, 2)){
return fill_with_copy(first, count, value, queue);
}
value_type pattern = static_cast<value_type>(value);
size_t offset = static_cast<size_t>(first.get_index());
if(count == 1){
// use clEnqueueWriteBuffer() directly when writing a single value
// to the device buffer. this is potentially more efficient and also
// works around a bug in the intel opencl driver.
queue.enqueue_write_buffer(
first.get_buffer(),
offset * sizeof(value_type),
sizeof(value_type),
&pattern
);
}
else {
queue.enqueue_fill_buffer(
first.get_buffer(),
&pattern,
sizeof(value_type),
offset * sizeof(value_type),
count * sizeof(value_type)
);
}
}
template<class BufferIterator, class T>
inline future<void>
dispatch_fill_async(BufferIterator first,
size_t count,
const T &value,
command_queue &queue,
typename boost::enable_if<
is_valid_fill_buffer_iterator<BufferIterator>
>::type* = 0)
{
typedef typename std::iterator_traits<BufferIterator>::value_type value_type;
// check if the device supports OpenCL 1.2 (required for enqueue_fill_buffer)
if(!queue.check_device_version(1, 2)){
return fill_async_with_copy(first, count, value, queue);
}
value_type pattern = static_cast<value_type>(value);
size_t offset = static_cast<size_t>(first.get_index());
event event_ =
queue.enqueue_fill_buffer(first.get_buffer(),
&pattern,
sizeof(value_type),
offset * sizeof(value_type),
count * sizeof(value_type));
return future<void>(event_);
}
#ifdef CL_VERSION_2_0
// specializations for svm_ptr<T>
template<class T>
inline void dispatch_fill(svm_ptr<T> first,
size_t count,
const T &value,
command_queue &queue)
{
if(count == 0){
return;
}
queue.enqueue_svm_fill(
first.get(), &value, sizeof(T), count * sizeof(T)
);
}
template<class T>
inline future<void> dispatch_fill_async(svm_ptr<T> first,
size_t count,
const T &value,
command_queue &queue)
{
if(count == 0){
return future<void>();
}
event event_ = queue.enqueue_svm_fill(
first.get(), &value, sizeof(T), count * sizeof(T)
);
return future<void>(event_);
}
#endif // CL_VERSION_2_0
// default implementations
template<class BufferIterator, class T>
inline void
dispatch_fill(BufferIterator first,
size_t count,
const T &value,
command_queue &queue,
typename boost::disable_if<
is_valid_fill_buffer_iterator<BufferIterator>
>::type* = 0)
{
fill_with_copy(first, count, value, queue);
}
template<class BufferIterator, class T>
inline future<void>
dispatch_fill_async(BufferIterator first,
size_t count,
const T &value,
command_queue &queue,
typename boost::disable_if<
is_valid_fill_buffer_iterator<BufferIterator>
>::type* = 0)
{
return fill_async_with_copy(first, count, value, queue);
}
#else
template<class BufferIterator, class T>
inline void dispatch_fill(BufferIterator first,
size_t count,
const T &value,
command_queue &queue)
{
fill_with_copy(first, count, value, queue);
}
template<class BufferIterator, class T>
inline future<void> dispatch_fill_async(BufferIterator first,
size_t count,
const T &value,
command_queue &queue)
{
return fill_async_with_copy(first, count, value, queue);
}
#endif // !defined(CL_VERSION_1_2)
} // end detail namespace
/// Fills the range [\p first, \p last) with \p value.
///
/// \param first first element in the range to fill
/// \param last last element in the range to fill
/// \param value value to copy to each element
/// \param queue command queue to perform the operation
///
/// For example, to fill a vector on the device with sevens:
/// \code
/// // vector on the device
/// boost::compute::vector<int> vec(10, context);
///
/// // fill vector with sevens
/// boost::compute::fill(vec.begin(), vec.end(), 7, queue);
/// \endcode
///
/// \see boost::compute::fill_n()
template<class BufferIterator, class T>
inline void fill(BufferIterator first,
BufferIterator last,
const T &value,
command_queue &queue = system::default_queue())
{
size_t count = detail::iterator_range_size(first, last);
if(count == 0){
return;
}
detail::dispatch_fill(first, count, value, queue);
}
template<class BufferIterator, class T>
inline future<void> fill_async(BufferIterator first,
BufferIterator last,
const T &value,
command_queue &queue = system::default_queue())
{
size_t count = detail::iterator_range_size(first, last);
if(count == 0){
return future<void>();
}
return detail::dispatch_fill_async(first, count, value, queue);
}
} // end compute namespace
} // end boost namespace
#endif // BOOST_COMPUTE_ALGORITHM_FILL_HPP