// -*- C++ -*-
//===------------------------------- simd ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef _LIBCPP_EXPERIMENTAL_SIMD
#define _LIBCPP_EXPERIMENTAL_SIMD

/*
    experimental/simd synopsis

namespace std::experimental {

inline namespace parallelism_v2 {

namespace simd_abi {

struct scalar {};
template  struct fixed_size {};
template  inline constexpr int max_fixed_size = implementation-defined;
template  using compatible = implementation-defined;
template  using native = implementation-defined;

} // simd_abi

struct element_aligned_tag {};
struct vector_aligned_tag {};
template  struct overaligned_tag {};
inline constexpr element_aligned_tag element_aligned{};
inline constexpr vector_aligned_tag vector_aligned{};
template  inline constexpr overaligned_tag overaligned{};

// traits [simd.traits]
template  struct is_abi_tag;
template  inline constexpr bool is_abi_tag_v = is_abi_tag::value;

template  struct is_simd;
template  inline constexpr bool is_simd_v = is_simd::value;

template  struct is_simd_mask;
template  inline constexpr bool is_simd_mask_v = is_simd_mask::value;

template  struct is_simd_flag_type;
template  inline constexpr bool is_simd_flag_type_v = is_simd_flag_type::value;

template  struct abi_for_size { using type = see below; };
template  using abi_for_size_t = typename abi_for_size::type;

template > struct simd_size;
template >
inline constexpr size_t simd_size_v = simd_size::value;

template  struct memory_alignment;
template 
inline constexpr size_t memory_alignment_v = memory_alignment::value;

// class template simd [simd.class]
template > class simd;
template  using native_simd = simd>;
template  using fixed_size_simd = simd>;

// class template simd_mask [simd.mask.class]
template > class simd_mask;
template  using native_simd_mask = simd_mask>;
template  using fixed_size_simd_mask = simd_mask>;

// casts [simd.casts]
template  see below simd_cast(const simd&);
template  see below static_simd_cast(const simd&);

template 
fixed_size_simd> to_fixed_size(const simd&) noexcept;
template 
fixed_size_simd_mask> to_fixed_size(const simd_mask&) noexcept;
template  native_simd to_native(const fixed_size_simd&) noexcept;
template 
native_simd_mask to_native(const fixed_size_simd_mask> &) noexcept;
template  simd to_compatible(const fixed_size_simd&) noexcept;
template  simd_mask to_compatible(const fixed_size_simd_mask&) noexcept;

template 
tuple>...> split(const simd&);
template 
tuple>...> split(const simd_mask&);
template 
array / V::size()> split(
const simd&);
template 
array / V::size()> split(
const simd_mask&);

template 
simd + ...)>> concat(const simd&...);
template 
simd_mask + ...)>> concat(const simd_mask&...);

// reductions [simd.mask.reductions]
template  bool all_of(const simd_mask&) noexcept;
template  bool any_of(const simd_mask&) noexcept;
template  bool none_of(const simd_mask&) noexcept;
template  bool some_of(const simd_mask&) noexcept;
template  int popcount(const simd_mask&) noexcept;
template  int find_first_set(const simd_mask&);
template  int find_last_set(const simd_mask&);

bool all_of(see below) noexcept;
bool any_of(see below) noexcept;
bool none_of(see below) noexcept;
bool some_of(see below) noexcept;
int popcount(see below) noexcept;
int find_first_set(see below) noexcept;
int find_last_set(see below) noexcept;

// masked assignment [simd.whereexpr]
template  class const_where_expression;
template  class where_expression;

// masked assignment [simd.mask.where]
template  struct nodeduce { using type = T; }; // exposition only

template  using nodeduce_t = typename nodeduce::type; // exposition only

template 
where_expression, simd>
where(const typename simd::mask_type&, simd&) noexcept;

template 
const_where_expression, const simd>
where(const typename simd::mask_type&, const simd&) noexcept;

template 
where_expression, simd_mask>
where(const nodeduce_t>&, simd_mask&) noexcept;

template 
const_where_expression, const simd_mask>
where(const nodeduce_t>&, const simd_mask&) noexcept;

template  where_expression where(see below k, T& d) noexcept;

template 
const_where_expression where(see below k, const T& d) noexcept;

// reductions [simd.reductions]
template >
T reduce(const simd&, BinaryOperation = BinaryOperation());

template 
typename V::value_type reduce(const const_where_expression& x,
typename V::value_type neutral_element, BinaryOperation binary_op);

template 
typename V::value_type reduce(const const_where_expression& x, plus<> binary_op = plus<>());

template 
typename V::value_type reduce(const const_where_expression& x, multiplies<> binary_op);

template 
typename V::value_type reduce(const const_where_expression& x, bit_and<> binary_op);

template 
typename V::value_type reduce(const const_where_expression& x, bit_or<> binary_op);

template 
typename V::value_type reduce(const const_where_expression& x, bit_xor<> binary_op);

template  T hmin(const simd&);
template  T hmin(const const_where_expression&);
template  T hmax(const simd&);
template  T hmax(const const_where_expression&);

// algorithms [simd.alg]
template  simd min(const simd&, const simd&) noexcept;

template  simd max(const simd&, const simd&) noexcept;

template 
std::pair, simd> minmax(const simd&, const simd&) noexcept;

template 
simd clamp(const simd& v, const simd& lo, const simd& hi);

// [simd.whereexpr]
template 
class const_where_expression {
  const M& mask; // exposition only
  T& data; // exposition only
public:
  const_where_expression(const const_where_expression&) = delete;
  const_where_expression& operator=(const const_where_expression&) = delete;
  remove_const_t operator-() const &&;
  template  void copy_to(U* mem, Flags f) const &&;
};

template 
class where_expression : public const_where_expression {
public:
  where_expression(const where_expression&) = delete;
  where_expression& operator=(const where_expression&) = delete;
  template  void operator=(U&& x);
  template  void operator+=(U&& x);
  template  void operator-=(U&& x);
  template  void operator*=(U&& x);
  template  void operator/=(U&& x);
  template  void operator%=(U&& x);
  template  void operator&=(U&& x);
  template  void operator|=(U&& x);
  template  void operator^=(U&& x);
  template  void operator<<=(U&& x);
  template  void operator>>=(U&& x);
  void operator++();
  void operator++(int);
  void operator--();
  void operator--(int);
  template  void copy_from(const U* mem, Flags);
};

// [simd.class]
template  class simd {
public:
  using value_type = T;
  using reference = see below;
  using mask_type = simd_mask;

  using abi_type = Abi;
  static constexpr size_t size() noexcept;
  simd() = default;

  // implicit type conversion constructor
  template  simd(const simd>&);

  // implicit broadcast constructor (see below for constraints)
  template  simd(U&& value);

  // generator constructor (see below for constraints)
  template  explicit simd(G&& gen);

  // load constructor
  template  simd(const U* mem, Flags f);

  // loads [simd.load]
  template  void copy_from(const U* mem, Flags f);

  // stores [simd.store]
  template  void copy_to(U* mem, Flags f) const;

  // scalar access [simd.subscr]
  reference operator[](size_t);
  value_type operator[](size_t) const;

  // unary operators [simd.unary]
  simd& operator++();
  simd operator++(int);
  simd& operator--();
  simd operator--(int);
  mask_type operator!() const;
  simd operator~() const; // see below
  simd operator+() const;
  simd operator-() const;

  // binary operators [simd.binary]
  friend simd operator+ (const simd&, const simd&);
  friend simd operator- (const simd&, const simd&);
  friend simd operator* (const simd&, const simd&);
  friend simd operator/ (const simd&, const simd&);
  friend simd operator% (const simd&, const simd&);
  friend simd operator& (const simd&, const simd&);
  friend simd operator| (const simd&, const simd&);
  friend simd operator^ (const simd&, const simd&);
  friend simd operator<<(const simd&, const simd&);
  friend simd operator>>(const simd&, const simd&);
  friend simd operator<<(const simd&, int);
  friend simd operator>>(const simd&, int);

  // compound assignment [simd.cassign]
  friend simd& operator+= (simd&, const simd&);
  friend simd& operator-= (simd&, const simd&);
  friend simd& operator*= (simd&, const simd&);
  friend simd& operator/= (simd&, const simd&);
  friend simd& operator%= (simd&, const simd&);

  friend simd& operator&= (simd&, const simd&);
  friend simd& operator|= (simd&, const simd&);
  friend simd& operator^= (simd&, const simd&);
  friend simd& operator<<=(simd&, const simd&);
  friend simd& operator>>=(simd&, const simd&);
  friend simd& operator<<=(simd&, int);
  friend simd& operator>>=(simd&, int);

  // compares [simd.comparison]
  friend mask_type operator==(const simd&, const simd&);
  friend mask_type operator!=(const simd&, const simd&);
  friend mask_type operator>=(const simd&, const simd&);
  friend mask_type operator<=(const simd&, const simd&);
  friend mask_type operator> (const simd&, const simd&);
  friend mask_type operator< (const simd&, const simd&);
};

// [simd.math]
template  using scharv = simd; // exposition only
template  using shortv = simd; // exposition only
template  using intv = simd; // exposition only
template  using longv = simd; // exposition only
template  using llongv = simd; // exposition only
template  using floatv = simd; // exposition only
template  using doublev = simd; // exposition only
template  using ldoublev = simd; // exposition only
template  using samesize = fixed_size_simd; // exposition only

template  floatv acos(floatv x);
template  doublev acos(doublev x);
template  ldoublev acos(ldoublev x);

template  floatv asin(floatv x);
template  doublev asin(doublev x);
template  ldoublev asin(ldoublev x);

template  floatv atan(floatv x);
template  doublev atan(doublev x);
template  ldoublev atan(ldoublev x);

template  floatv atan2(floatv y, floatv x);
template  doublev atan2(doublev y, doublev x);
template  ldoublev atan2(ldoublev y, ldoublev x);

template  floatv cos(floatv x);
template  doublev cos(doublev x);
template  ldoublev cos(ldoublev x);

template  floatv sin(floatv x);
template  doublev sin(doublev x);
template  ldoublev sin(ldoublev x);

template  floatv tan(floatv x);
template  doublev tan(doublev x);
template  ldoublev tan(ldoublev x);

template  floatv acosh(floatv x);
template  doublev acosh(doublev x);
template  ldoublev acosh(ldoublev x);

template  floatv asinh(floatv x);
template  doublev asinh(doublev x);
template  ldoublev asinh(ldoublev x);

template  floatv atanh(floatv x);
template  doublev atanh(doublev x);
template  ldoublev atanh(ldoublev x);

template  floatv cosh(floatv x);
template  doublev cosh(doublev x);
template  ldoublev cosh(ldoublev x);

template  floatv sinh(floatv x);
template  doublev sinh(doublev x);
template  ldoublev sinh(ldoublev x);

template  floatv tanh(floatv x);
template  doublev tanh(doublev x);
template  ldoublev tanh(ldoublev x);

template  floatv exp(floatv x);
template  doublev exp(doublev x);
template  ldoublev exp(ldoublev x);

template  floatv exp2(floatv x);
template  doublev exp2(doublev x);
template  ldoublev exp2(ldoublev x);

template  floatv expm1(floatv x);
template  doublev expm1(doublev x);
template  ldoublev expm1(ldoublev x);

template  floatv frexp(floatv value, samesize>* exp);
template  doublev frexp(doublev value, samesize>* exp);
template  ldoublev frexp(ldoublev value, samesize>* exp);

template  samesize> ilogb(floatv x);
template  samesize> ilogb(doublev x);
template  samesize> ilogb(ldoublev x);

template  floatv ldexp(floatv x, samesize> exp);
template  doublev ldexp(doublev x, samesize> exp);
template  ldoublev ldexp(ldoublev x, samesize> exp);

template  floatv log(floatv x);
template  doublev log(doublev x);
template  ldoublev log(ldoublev x);

template  floatv log10(floatv x);
template  doublev log10(doublev x);
template  ldoublev log10(ldoublev x);

template  floatv log1p(floatv x);
template  doublev log1p(doublev x);
template  ldoublev log1p(ldoublev x);

template  floatv log2(floatv x);
template  doublev log2(doublev x);
template  ldoublev log2(ldoublev x);

template  floatv logb(floatv x);
template  doublev logb(doublev x);
template  ldoublev logb(ldoublev x);

template  floatv modf(floatv value, floatv* iptr);
template  doublev modf(doublev value, doublev* iptr);
template  ldoublev modf(ldoublev value, ldoublev* iptr);

template  floatv scalbn(floatv x, samesize> n);
template  doublev scalbn(doublev x, samesize> n);
template  ldoublev scalbn(ldoublev x, samesize> n);
template  floatv scalbln(floatv x, samesize> n);
template  doublev scalbln(doublev x, samesize> n);
template  ldoublev scalbln(ldoublev x, samesize> n);

template  floatv cbrt(floatv x);
template  doublev cbrt(doublev x);
template  ldoublev cbrt(ldoublev x);

template  scharv abs(scharv j);
template  shortv abs(shortv j);
template  intv abs(intv j);
template  longv abs(longv j);
template  llongv abs(llongv j);
template  floatv abs(floatv j);
template  doublev abs(doublev j);
template  ldoublev abs(ldoublev j);

template  floatv hypot(floatv x, floatv y);
template  doublev hypot(doublev x, doublev y);
template  ldoublev hypot(doublev x, doublev y);
template  floatv hypot(floatv x, floatv y, floatv z);
template  doublev hypot(doublev x, doublev y, doublev z);
template  ldoublev hypot(ldoublev x, ldoublev y, ldoublev z);

template  floatv pow(floatv x, floatv y);
template  doublev pow(doublev x, doublev y);
template  ldoublev pow(ldoublev x, ldoublev y);

template  floatv sqrt(floatv x);
template  doublev sqrt(doublev x);
template  ldoublev sqrt(ldoublev x);

template  floatv erf(floatv x);
template  doublev erf(doublev x);
template  ldoublev erf(ldoublev x);
template  floatv erfc(floatv x);
template  doublev erfc(doublev x);
template  ldoublev erfc(ldoublev x);

template  floatv lgamma(floatv x);
template  doublev lgamma(doublev x);
template  ldoublev lgamma(ldoublev x);

template  floatv tgamma(floatv x);
template  doublev tgamma(doublev x);
template  ldoublev tgamma(ldoublev x);

template  floatv ceil(floatv x);
template  doublev ceil(doublev x);
template  ldoublev ceil(ldoublev x);

template  floatv floor(floatv x);
template  doublev floor(doublev x);
template  ldoublev floor(ldoublev x);

template  floatv nearbyint(floatv x);
template  doublev nearbyint(doublev x);
template  ldoublev nearbyint(ldoublev x);

template  floatv rint(floatv x);
template  doublev rint(doublev x);
template  ldoublev rint(ldoublev x);

template  samesize> lrint(floatv x);
template  samesize> lrint(doublev x);
template  samesize> lrint(ldoublev x);
template  samesize> llrint(floatv x);
template  samesize> llrint(doublev x);
template  samesize> llrint(ldoublev x);

template  floatv round(floatv x);
template  doublev round(doublev x);
template  ldoublev round(ldoublev x);
template  samesize> lround(floatv x);
template  samesize> lround(doublev x);
template  samesize> lround(ldoublev x);
template  samesize> llround(floatv x);
template  samesize> llround(doublev x);
template  samesize> llround(ldoublev x);

template  floatv trunc(floatv x);
template  doublev trunc(doublev x);
template  ldoublev trunc(ldoublev x);

template  floatv fmod(floatv x, floatv y);
template  doublev fmod(doublev x, doublev y);
template  ldoublev fmod(ldoublev x, ldoublev y);

template  floatv remainder(floatv x, floatv y);
template  doublev remainder(doublev x, doublev y);
template  ldoublev remainder(ldoublev x, ldoublev y);

template  floatv remquo(floatv x, floatv y, samesize>* quo);
template  doublev remquo(doublev x, doublev y, samesize>* quo);
template  ldoublev remquo(ldoublev x, ldoublev y, samesize>* quo);

template  floatv copysign(floatv x, floatv y);
template  doublev copysign(doublev x, doublev y);
template  ldoublev copysign(ldoublev x, ldoublev y);

template  doublev nan(const char* tagp);
template  floatv nanf(const char* tagp);
template  ldoublev nanl(const char* tagp);

template  floatv nextafter(floatv x, floatv y);
template  doublev nextafter(doublev x, doublev y);
template  ldoublev nextafter(ldoublev x, ldoublev y);

template  floatv nexttoward(floatv x, ldoublev y);
template  doublev nexttoward(doublev x, ldoublev y);
template  ldoublev nexttoward(ldoublev x, ldoublev y);

template  floatv fdim(floatv x, floatv y);
template  doublev fdim(doublev x, doublev y);
template  ldoublev fdim(ldoublev x, ldoublev y);

template  floatv fmax(floatv x, floatv y);
template  doublev fmax(doublev x, doublev y);
template  ldoublev fmax(ldoublev x, ldoublev y);

template  floatv fmin(floatv x, floatv y);
template  doublev fmin(doublev x, doublev y);
template  ldoublev fmin(ldoublev x, ldoublev y);

template  floatv fma(floatv x, floatv y, floatv z);
template  doublev fma(doublev x, doublev y, doublev z);
template  ldoublev fma(ldoublev x, ldoublev y, ldoublev z);

template  samesize> fpclassify(floatv x);
template  samesize> fpclassify(doublev x);
template  samesize> fpclassify(ldoublev x);

template  simd_mask isfinite(floatv x);
template  simd_mask isfinite(doublev x);
template  simd_mask isfinite(ldoublev x);

template  simd_mask isinf(floatv x);
template  simd_mask isinf(doublev x);
template  simd_mask isinf(ldoublev x);

template  simd_mask isnan(floatv x);
template  simd_mask isnan(doublev x);
template  simd_mask isnan(ldoublev x);

template  simd_mask isnormal(floatv x);
template  simd_mask isnormal(doublev x);
template  simd_mask isnormal(ldoublev x);

template  simd_mask signbit(floatv x);
template  simd_mask signbit(doublev x);
template  simd_mask signbit(ldoublev x);

template  simd_mask isgreater(floatv x, floatv y);
template  simd_mask isgreater(doublev x, doublev y);
template  simd_mask isgreater(ldoublev x, ldoublev y);

template  simd_mask isgreaterequal(floatv x, floatv y);
template  simd_mask isgreaterequal(doublev x, doublev y);
template  simd_mask isgreaterequal(ldoublev x, ldoublev y);

template  simd_mask isless(floatv x, floatv y);
template  simd_mask isless(doublev x, doublev y);
template  simd_mask isless(ldoublev x, ldoublev y);

template  simd_mask islessequal(floatv x, floatv y);
template  simd_mask islessequal(doublev x, doublev y);
template  simd_mask islessequal(ldoublev x, ldoublev y);

template  simd_mask islessgreater(floatv x, floatv y);
template  simd_mask islessgreater(doublev x, doublev y);
template  simd_mask islessgreater(ldoublev x, ldoublev y);

template  simd_mask isunordered(floatv x, floatv y);
template  simd_mask isunordered(doublev x, doublev y);
template  simd_mask isunordered(ldoublev x, ldoublev y);

template  struct simd_div_t { V quot, rem; };
template  simd_div_t> div(scharv numer, scharv denom);
template  simd_div_t> div(shortv numer, shortv denom);
template  simd_div_t> div(intv numer, intv denom);
template  simd_div_t> div(longv numer, longv denom);
template  simd_div_t> div(llongv numer, llongv denom);

// [simd.mask.class]
template 
class simd_mask {
public:
  using value_type = bool;
  using reference = see below;
  using simd_type = simd;
  using abi_type = Abi;
  static constexpr size_t size() noexcept;
  simd_mask() = default;

  // broadcast constructor
  explicit simd_mask(value_type) noexcept;

  // implicit type conversion constructor
  template  simd_mask(const simd_mask>&) noexcept;

  // load constructor
  template  simd_mask(const value_type* mem, Flags);

  // loads [simd.mask.copy]
  template  void copy_from(const value_type* mem, Flags);
  template  void copy_to(value_type* mem, Flags) const;

  // scalar access [simd.mask.subscr]
  reference operator[](size_t);
  value_type operator[](size_t) const;

  // unary operators [simd.mask.unary]
  simd_mask operator!() const noexcept;

  // simd_mask binary operators [simd.mask.binary]
  friend simd_mask operator&&(const simd_mask&, const simd_mask&) noexcept;
  friend simd_mask operator||(const simd_mask&, const simd_mask&) noexcept;
  friend simd_mask operator& (const simd_mask&, const simd_mask&) noexcept;
  friend simd_mask operator| (const simd_mask&, const simd_mask&) noexcept;
  friend simd_mask operator^ (const simd_mask&, const simd_mask&) noexcept;

  // simd_mask compound assignment [simd.mask.cassign]
  friend simd_mask& operator&=(simd_mask&, const simd_mask&) noexcept;
  friend simd_mask& operator|=(simd_mask&, const simd_mask&) noexcept;
  friend simd_mask& operator^=(simd_mask&, const simd_mask&) noexcept;

  // simd_mask compares [simd.mask.comparison]
  friend simd_mask operator==(const simd_mask&, const simd_mask&) noexcept;
  friend simd_mask operator!=(const simd_mask&, const simd_mask&) noexcept;
};

} // parallelism_v2
} // std::experimental

*/

#include 
#include 
#include 
#include 
#include 

#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
#pragma GCC system_header
#endif

_LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD

#if _LIBCPP_STD_VER >= 17

enum class _StorageKind {
  _Scalar,
  _Array,
  _VecExt,
};

template <_StorageKind __kind, int _Np>
struct __simd_abi {};

template 
class __simd_storage {};

template 
class __simd_storage<_Tp, __simd_abi<_StorageKind::_Array, __num_element>> {
  std::array<_Tp, __num_element> __storage_;

  template 
  friend struct simd;

  template 
  friend struct simd_mask;

public:
  _Tp __get(size_t __index) const noexcept { return __storage_[__index]; };
  void __set(size_t __index, _Tp __val) noexcept {
    __storage_[__index] = __val;
  }
};

template 
class __simd_storage<_Tp, __simd_abi<_StorageKind::_Scalar, 1>> {
  _Tp __storage_;

  template 
  friend struct simd;

  template 
  friend struct simd_mask;

public:
  _Tp __get(size_t __index) const noexcept { return (&__storage_)[__index]; };
  void __set(size_t __index, _Tp __val) noexcept {
    (&__storage_)[__index] = __val;
  }
};

#ifndef _LIBCPP_HAS_NO_VECTOR_EXTENSION

constexpr size_t __floor_pow_of_2(size_t __val) {
  return ((__val - 1) & __val) == 0 ? __val
                                    : __floor_pow_of_2((__val - 1) & __val);
}

constexpr size_t __ceil_pow_of_2(size_t __val) {
  return __val == 1 ? 1 : __floor_pow_of_2(__val - 1) << 1;
}

template 
struct __vec_ext_traits {
#if !defined(_LIBCPP_COMPILER_CLANG)
  typedef _Tp type __attribute__((vector_size(__ceil_pow_of_2(__bytes))));
#endif
};

#if defined(_LIBCPP_COMPILER_CLANG)
#define _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, _NUM_ELEMENT)                        \
  template <>                                                                  \
  struct __vec_ext_traits<_TYPE, sizeof(_TYPE) * _NUM_ELEMENT> {               \
    using type =                                                               \
        _TYPE __attribute__((vector_size(sizeof(_TYPE) * _NUM_ELEMENT)));      \
  }

#define _LIBCPP_SPECIALIZE_VEC_EXT_32(_TYPE)                                   \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 1);                                        \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 2);                                        \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 3);                                        \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 4);                                        \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 5);                                        \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 6);                                        \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 7);                                        \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 8);                                        \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 9);                                        \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 10);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 11);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 12);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 13);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 14);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 15);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 16);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 17);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 18);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 19);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 20);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 21);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 22);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 23);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 24);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 25);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 26);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 27);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 28);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 29);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 30);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 31);                                       \
  _LIBCPP_SPECIALIZE_VEC_EXT(_TYPE, 32);

_LIBCPP_SPECIALIZE_VEC_EXT_32(char);
_LIBCPP_SPECIALIZE_VEC_EXT_32(char16_t);
_LIBCPP_SPECIALIZE_VEC_EXT_32(char32_t);
_LIBCPP_SPECIALIZE_VEC_EXT_32(wchar_t);
_LIBCPP_SPECIALIZE_VEC_EXT_32(signed char);
_LIBCPP_SPECIALIZE_VEC_EXT_32(signed short);
_LIBCPP_SPECIALIZE_VEC_EXT_32(signed int);
_LIBCPP_SPECIALIZE_VEC_EXT_32(signed long);
_LIBCPP_SPECIALIZE_VEC_EXT_32(signed long long);
_LIBCPP_SPECIALIZE_VEC_EXT_32(unsigned char);
_LIBCPP_SPECIALIZE_VEC_EXT_32(unsigned short);
_LIBCPP_SPECIALIZE_VEC_EXT_32(unsigned int);
_LIBCPP_SPECIALIZE_VEC_EXT_32(unsigned long);
_LIBCPP_SPECIALIZE_VEC_EXT_32(unsigned long long);
_LIBCPP_SPECIALIZE_VEC_EXT_32(float);
_LIBCPP_SPECIALIZE_VEC_EXT_32(double);
_LIBCPP_SPECIALIZE_VEC_EXT_32(long double);

#undef _LIBCPP_SPECIALIZE_VEC_EXT_32
#undef _LIBCPP_SPECIALIZE_VEC_EXT
#endif

template 
class __simd_storage<_Tp, __simd_abi<_StorageKind::_VecExt, __num_element>> {
  using _StorageType =
      typename __vec_ext_traits<_Tp, sizeof(_Tp) * __num_element>::type;

  _StorageType __storage_;

  template 
  friend struct simd;

  template 
  friend struct simd_mask;

public:
  _Tp __get(size_t __index) const noexcept { return __storage_[__index]; };
  void __set(size_t __index, _Tp __val) noexcept {
    __storage_[__index] = __val;
  }
};

#endif // _LIBCPP_HAS_NO_VECTOR_EXTENSION

template 
class __simd_reference {
  static_assert(std::is_same<_Vp, _Tp>::value, "");

  template 
  friend struct simd;

  template 
  friend struct simd_mask;

  __simd_storage<_Tp, _Abi>* __ptr_;
  size_t __index_;

  __simd_reference(__simd_storage<_Tp, _Abi>* __ptr, size_t __index)
      : __ptr_(__ptr), __index_(__index) {}

  __simd_reference(const __simd_reference&) = default;

public:
  __simd_reference() = delete;
  __simd_reference& operator=(const __simd_reference&) = delete;

  operator _Vp() const { return __ptr_->__get(__index_); }

  __simd_reference operator=(_Vp __value) && {
    __ptr_->__set(__index_, __value);
    return *this;
  }

  __simd_reference operator++() && {
    return std::move(*this) = __ptr_->__get(__index_) + 1;
  }

  _Vp operator++(int) && {
    auto __val = __ptr_->__get(__index_);
    __ptr_->__set(__index_, __val + 1);
    return __val;
  }

  __simd_reference operator--() && {
    return std::move(*this) = __ptr_->__get(__index_) - 1;
  }

  _Vp operator--(int) && {
    auto __val = __ptr_->__get(__index_);
    __ptr_->__set(__index_, __val - 1);
    return __val;
  }

  __simd_reference operator+=(_Vp __value) && {
    return std::move(*this) = __ptr_->__get(__index_) + __value;
  }

  __simd_reference operator-=(_Vp __value) && {
    return std::move(*this) = __ptr_->__get(__index_) - __value;
  }

  __simd_reference operator*=(_Vp __value) && {
    return std::move(*this) = __ptr_->__get(__index_) * __value;
  }

  __simd_reference operator/=(_Vp __value) && {
    return std::move(*this) = __ptr_->__get(__index_) / __value;
  }

  __simd_reference operator%=(_Vp __value) && {
    return std::move(*this) = __ptr_->__get(__index_) % __value;
  }

  __simd_reference operator>>=(_Vp __value) && {
    return std::move(*this) = __ptr_->__get(__index_) >> __value;
  }

  __simd_reference operator<<=(_Vp __value) && {
    return std::move(*this) = __ptr_->__get(__index_) << __value;
  }

  __simd_reference operator&=(_Vp __value) && {
    return std::move(*this) = __ptr_->__get(__index_) & __value;
  }

  __simd_reference operator|=(_Vp __value) && {
    return std::move(*this) = __ptr_->__get(__index_) | __value;
  }

  __simd_reference operator^=(_Vp __value) && {
    return std::move(*this) = __ptr_->__get(__index_) ^ __value;
  }
};

template 
constexpr decltype(_To{std::declval<_From>()}, true)
__is_non_narrowing_convertible_impl(_From) {
  return true;
}

template 
constexpr bool __is_non_narrowing_convertible_impl(...) {
  return false;
}

template 
constexpr typename std::enable_if::value &&
                                      std::is_arithmetic<_From>::value,
                                  bool>::type
__is_non_narrowing_arithmetic_convertible() {
  return __is_non_narrowing_convertible_impl<_To>(_From{});
}

template 
constexpr typename std::enable_if::value &&
                                    std::is_arithmetic<_From>::value),
                                  bool>::type
__is_non_narrowing_arithmetic_convertible() {
  return false;
}

template 
constexpr _Tp __variadic_sum() {
  return _Tp{};
}

template 
constexpr _Tp __variadic_sum(_Up __first, _Args... __rest) {
  return static_cast<_Tp>(__first) + __variadic_sum<_Tp>(__rest...);
}

template 
struct __nodeduce {
  using type = _Tp;
};

template 
constexpr bool __vectorizable() {
  return std::is_arithmetic<_Tp>::value && !std::is_const<_Tp>::value &&
         !std::is_volatile<_Tp>::value && !std::is_same<_Tp, bool>::value;
}

_LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD
_LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD_ABI

using scalar = __simd_abi<_StorageKind::_Scalar, 1>;

template 
using fixed_size = __simd_abi<_StorageKind::_Array, _Np>;

template 
_LIBCPP_INLINE_VAR constexpr size_t max_fixed_size = 32;

template 
using compatible = fixed_size<16 / sizeof(_Tp)>;

#ifndef _LIBCPP_HAS_NO_VECTOR_EXTENSION
template 
using native = __simd_abi<_StorageKind::_VecExt,
                          _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES / sizeof(_Tp)>;
#else
template 
using native =
    fixed_size<_Tp, _LIBCPP_NATIVE_SIMD_WIDTH_IN_BYTES / sizeof(_Tp)>;
#endif // _LIBCPP_HAS_NO_VECTOR_EXTENSION

_LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD_ABI
_LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_SIMD

template >
class simd;
template >
class simd_mask;

struct element_aligned_tag {};
struct vector_aligned_tag {};
template 
struct overaligned_tag {};
_LIBCPP_INLINE_VAR constexpr element_aligned_tag element_aligned{};
_LIBCPP_INLINE_VAR constexpr vector_aligned_tag vector_aligned{};
template 
_LIBCPP_INLINE_VAR constexpr overaligned_tag<_Np> overaligned{};

// traits [simd.traits]
template 
struct is_abi_tag : std::integral_constant {};

template <_StorageKind __kind, int _Np>
struct is_abi_tag<__simd_abi<__kind, _Np>>
    : std::integral_constant {};

template 
struct is_simd : std::integral_constant {};

template 
struct is_simd> : std::integral_constant {};

template 
struct is_simd_mask : std::integral_constant {};

template 
struct is_simd_mask> : std::integral_constant {
};

template 
struct is_simd_flag_type : std::integral_constant {};

template <>
struct is_simd_flag_type
    : std::integral_constant {};

template <>
struct is_simd_flag_type
    : std::integral_constant {};

template 
struct is_simd_flag_type>
    : std::integral_constant {};

template 
_LIBCPP_INLINE_VAR constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
template 
_LIBCPP_INLINE_VAR constexpr bool is_simd_v = is_simd<_Tp>::value;
template 
_LIBCPP_INLINE_VAR constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
template 
_LIBCPP_INLINE_VAR constexpr bool is_simd_flag_type_v =
    is_simd_flag_type<_Tp>::value;
template 
struct abi_for_size {
  using type = simd_abi::fixed_size<_Np>;
};
template 
using abi_for_size_t = typename abi_for_size<_Tp, _Np>::type;

template >
struct simd_size;

template 
struct simd_size<_Tp, __simd_abi<__kind, _Np>>
    : std::integral_constant {
  static_assert(
      std::is_arithmetic<_Tp>::value &&
          !std::is_same::type, bool>::value,
      "Element type should be vectorizable");
};

// TODO: implement it.
template 
struct memory_alignment;

template >
_LIBCPP_INLINE_VAR constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;

template 
_LIBCPP_INLINE_VAR constexpr size_t memory_alignment_v =
    memory_alignment<_Tp, _Up>::value;

// class template simd [simd.class]
template 
using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
template 
using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;

// class template simd_mask [simd.mask.class]
template 
using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;

template 
using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;

// casts [simd.casts]
template 
struct __static_simd_cast_traits {
  template 
  static simd<_Tp, _Abi> __apply(const simd<_Up, _Abi>& __v);
};

template 
struct __static_simd_cast_traits> {
  template 
  static typename std::enable_if::size() ==
                                     simd<_Tp, _NewAbi>::size(),
                                 simd<_Tp, _NewAbi>>::type
  __apply(const simd<_Up, _Abi>& __v);
};

template 
struct __simd_cast_traits {
  template 
  static typename std::enable_if<
      __is_non_narrowing_arithmetic_convertible<_Up, _Tp>(),
      simd<_Tp, _Abi>>::type
  __apply(const simd<_Up, _Abi>& __v);
};

template 
struct __simd_cast_traits> {
  template 
  static typename std::enable_if<
      __is_non_narrowing_arithmetic_convertible<_Up, _Tp>() &&
          simd<_Up, _Abi>::size() == simd<_Tp, _NewAbi>::size(),
      simd<_Tp, _NewAbi>>::type
  __apply(const simd<_Up, _Abi>& __v);
};

template 
auto simd_cast(const simd<_Up, _Abi>& __v)
    -> decltype(__simd_cast_traits<_Tp>::__apply(__v)) {
  return __simd_cast_traits<_Tp>::__apply(__v);
}

template 
auto static_simd_cast(const simd<_Up, _Abi>& __v)
    -> decltype(__static_simd_cast_traits<_Tp>::__apply(__v)) {
  return __static_simd_cast_traits<_Tp>::__apply(__v);
}

template 
fixed_size_simd<_Tp, simd_size<_Tp, _Abi>::value>
to_fixed_size(const simd<_Tp, _Abi>&) noexcept;

template 
fixed_size_simd_mask<_Tp, simd_size<_Tp, _Abi>::value>
to_fixed_size(const simd_mask<_Tp, _Abi>&) noexcept;

template 
native_simd<_Tp> to_native(const fixed_size_simd<_Tp, _Np>&) noexcept;

template 
native_simd_mask<_Tp> to_native(const fixed_size_simd_mask<_Tp, _Np>&) noexcept;

template 
simd<_Tp> to_compatible(const fixed_size_simd<_Tp, _Np>&) noexcept;

template 
simd_mask<_Tp> to_compatible(const fixed_size_simd_mask<_Tp, _Np>&) noexcept;

template 
tuple>...> split(const simd<_Tp, _Abi>&);

template 
tuple>...>
split(const simd_mask<_Tp, _Abi>&);

template 
array<_SimdType, simd_size::value /
                     _SimdType::size()>
split(const simd&);

template 
array<_SimdType, simd_size::value /
                     _SimdType::size()>
split(const simd_mask&);

template 
simd<_Tp, abi_for_size_t<_Tp, __variadic_sum(simd_size<_Tp, _Abis>::value...)>>
concat(const simd<_Tp, _Abis>&...);

template 
simd_mask<_Tp,
          abi_for_size_t<_Tp, __variadic_sum(simd_size<_Tp, _Abis>::value...)>>
concat(const simd_mask<_Tp, _Abis>&...);

// reductions [simd.mask.reductions]
template 
bool all_of(const simd_mask<_Tp, _Abi>&) noexcept;
template 
bool any_of(const simd_mask<_Tp, _Abi>&) noexcept;
template 
bool none_of(const simd_mask<_Tp, _Abi>&) noexcept;
template 
bool some_of(const simd_mask<_Tp, _Abi>&) noexcept;
template 
int popcount(const simd_mask<_Tp, _Abi>&) noexcept;
template 
int find_first_set(const simd_mask<_Tp, _Abi>&);
template 
int find_last_set(const simd_mask<_Tp, _Abi>&);
bool all_of(bool) noexcept;
bool any_of(bool) noexcept;
bool none_of(bool) noexcept;
bool some_of(bool) noexcept;
int popcount(bool) noexcept;
int find_first_set(bool) noexcept;
int find_last_set(bool) noexcept;

// masked assignment [simd.whereexpr]
template 
class const_where_expression;
template 
class where_expression;

// masked assignment [simd.mask.where]
template 
where_expression, simd<_Tp, _Abi>>
where(const typename simd<_Tp, _Abi>::mask_type&, simd<_Tp, _Abi>&) noexcept;

template 
const_where_expression, const simd<_Tp, _Abi>>
where(const typename simd<_Tp, _Abi>::mask_type&,
      const simd<_Tp, _Abi>&) noexcept;

template 
where_expression, simd_mask<_Tp, _Abi>>
where(const typename __nodeduce>::type&,
      simd_mask<_Tp, _Abi>&) noexcept;

template 
const_where_expression, const simd_mask<_Tp, _Abi>>
where(const typename __nodeduce>::type&,
      const simd_mask<_Tp, _Abi>&) noexcept;

template 
where_expression where(bool, _Tp&) noexcept;

template 
const_where_expression where(bool, const _Tp&) noexcept;

// reductions [simd.reductions]
template >
_Tp reduce(const simd<_Tp, _Abi>&, _BinaryOp = _BinaryOp());

template 
typename _SimdType::value_type
reduce(const const_where_expression<_MaskType, _SimdType>&,
       typename _SimdType::value_type neutral_element, _BinaryOp binary_op);

template 
typename _SimdType::value_type
reduce(const const_where_expression<_MaskType, _SimdType>&,
       plus binary_op = {});

template 
typename _SimdType::value_type
reduce(const const_where_expression<_MaskType, _SimdType>&,
       multiplies binary_op);

template 
typename _SimdType::value_type
reduce(const const_where_expression<_MaskType, _SimdType>&,
       bit_and binary_op);

template 
typename _SimdType::value_type
reduce(const const_where_expression<_MaskType, _SimdType>&,
       bit_or binary_op);

template 
typename _SimdType::value_type
reduce(const const_where_expression<_MaskType, _SimdType>&,
       bit_xor binary_op);

template 
_Tp hmin(const simd<_Tp, _Abi>&);
template 
typename _SimdType::value_type
hmin(const const_where_expression<_MaskType, _SimdType>&);
template 
_Tp hmax(const simd<_Tp, _Abi>&);
template 
typename _SimdType::value_type
hmax(const const_where_expression<_MaskType, _SimdType>&);

// algorithms [simd.alg]
template 
simd<_Tp, _Abi> min(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&) noexcept;

template 
simd<_Tp, _Abi> max(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&) noexcept;

template 
std::pair, simd<_Tp, _Abi>>
minmax(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&) noexcept;

template 
simd<_Tp, _Abi> clamp(const simd<_Tp, _Abi>&, const simd<_Tp, _Abi>&,
                      const simd<_Tp, _Abi>&);

// [simd.whereexpr]
// TODO implement where expressions.
template 
class const_where_expression {
public:
  const_where_expression(const const_where_expression&) = delete;
  const_where_expression& operator=(const const_where_expression&) = delete;
  typename remove_const<_Tp>::type operator-() const&&;
  template 
  void copy_to(_Up*, _Flags) const&&;
};

template 
class where_expression : public const_where_expression<_MaskType, _Tp> {
public:
  where_expression(const where_expression&) = delete;
  where_expression& operator=(const where_expression&) = delete;
  template 
  void operator=(_Up&&);
  template 
  void operator+=(_Up&&);
  template 
  void operator-=(_Up&&);
  template 
  void operator*=(_Up&&);
  template 
  void operator/=(_Up&&);
  template 
  void operator%=(_Up&&);
  template 
  void operator&=(_Up&&);
  template 
  void operator|=(_Up&&);
  template 
  void operator^=(_Up&&);
  template 
  void operator<<=(_Up&&);
  template 
  void operator>>=(_Up&&);
  void operator++();
  void operator++(int);
  void operator--();
  void operator--(int);
  template 
  void copy_from(const _Up*, _Flags);
};

// [simd.class]
// TODO: implement simd
template 
class simd {
public:
  using value_type = _Tp;
  using reference = __simd_reference<_Tp, _Tp, _Abi>;
  using mask_type = simd_mask<_Tp, _Abi>;
  using abi_type = _Abi;

  simd() = default;
  simd(const simd&) = default;
  simd& operator=(const simd&) = default;

  static constexpr size_t size() noexcept {
    return simd_size<_Tp, _Abi>::value;
  }

private:
  __simd_storage<_Tp, _Abi> __s_;

  template 
  static constexpr bool __can_broadcast() {
    return (std::is_arithmetic<_Up>::value &&
            __is_non_narrowing_arithmetic_convertible<_Up, _Tp>()) ||
           (!std::is_arithmetic<_Up>::value &&
            std::is_convertible<_Up, _Tp>::value) ||
           std::is_same::type, int>::value ||
           (std::is_same::type,
                         unsigned int>::value &&
            std::is_unsigned<_Tp>::value);
  }

  template 
  static constexpr decltype(
      std::forward_as_tuple(std::declval<_Generator>()(
          std::integral_constant())...),
      bool())
  __can_generate(std::index_sequence<__indicies...>) {
    return !__variadic_sum(
        !__can_broadcast()(
            std::integral_constant()))>()...);
  }

  template 
  static bool __can_generate(...) {
    return false;
  }

  template 
  void __generator_init(_Generator&& __g, std::index_sequence<__indicies...>) {
    int __not_used[]{((*this)[__indicies] =
                          __g(std::integral_constant()),
                      0)...};
    (void)__not_used;
  }

public:
  // implicit type conversion constructor
  template >::value &&
                __is_non_narrowing_arithmetic_convertible<_Up, _Tp>()>::type>
  simd(const simd<_Up, simd_abi::fixed_size>& __v) {
    for (size_t __i = 0; __i < size(); __i++) {
      (*this)[__i] = static_cast<_Tp>(__v[__i]);
    }
  }

  // implicit broadcast constructor
  template ()>::type>
  simd(_Up&& __rv) {
    auto __v = static_cast<_Tp>(__rv);
    for (size_t __i = 0; __i < size(); __i++) {
      (*this)[__i] = __v;
    }
  }

  // generator constructor
  template (std::make_index_sequence()),
                int>::type()>
  explicit simd(_Generator&& __g) {
    __generator_init(std::forward<_Generator>(__g),
                     std::make_index_sequence());
  }

  // load constructor
  template <
      class _Up, class _Flags,
      class = typename std::enable_if<__vectorizable<_Up>()>::type,
      class = typename std::enable_if::value>::type>
  simd(const _Up* __buffer, _Flags) {
    // TODO: optimize for overaligned flags
    for (size_t __i = 0; __i < size(); __i++) {
      (*this)[__i] = static_cast<_Tp>(__buffer[__i]);
    }
  }

  // loads [simd.load]
  template 
  typename std::enable_if<__vectorizable<_Up>() &&
                          is_simd_flag_type<_Flags>::value>::type
  copy_from(const _Up* __buffer, _Flags) {
    *this = simd(__buffer, _Flags());
  }

  // stores [simd.store]
  template 
  typename std::enable_if<__vectorizable<_Up>() &&
                          is_simd_flag_type<_Flags>::value>::type
  copy_to(_Up* __buffer, _Flags) const {
    // TODO: optimize for overaligned flags
    for (size_t __i = 0; __i < size(); __i++) {
      __buffer[__i] = static_cast<_Up>((*this)[__i]);
    }
  }

  // scalar access [simd.subscr]
  reference operator[](size_t __i) { return reference(&__s_, __i); }

  value_type operator[](size_t __i) const { return __s_.__get(__i); }

  // unary operators [simd.unary]
  simd& operator++();
  simd operator++(int);
  simd& operator--();
  simd operator--(int);
  mask_type operator!() const;
  simd operator~() const;
  simd operator+() const;
  simd operator-() const;

  // binary operators [simd.binary]
  friend simd operator+(const simd&, const simd&);
  friend simd operator-(const simd&, const simd&);
  friend simd operator*(const simd&, const simd&);
  friend simd operator/(const simd&, const simd&);
  friend simd operator%(const simd&, const simd&);
  friend simd operator&(const simd&, const simd&);
  friend simd operator|(const simd&, const simd&);
  friend simd operator^(const simd&, const simd&);
  friend simd operator<<(const simd&, const simd&);
  friend simd operator>>(const simd&, const simd&);
  friend simd operator<<(const simd&, int);
  friend simd operator>>(const simd&, int);

  // compound assignment [simd.cassign]
  friend simd& operator+=(simd&, const simd&);
  friend simd& operator-=(simd&, const simd&);
  friend simd& operator*=(simd&, const simd&);
  friend simd& operator/=(simd&, const simd&);
  friend simd& operator%=(simd&, const simd&);

  friend simd& operator&=(simd&, const simd&);
  friend simd& operator|=(simd&, const simd&);
  friend simd& operator^=(simd&, const simd&);
  friend simd& operator<<=(simd&, const simd&);
  friend simd& operator>>=(simd&, const simd&);
  friend simd& operator<<=(simd&, int);
  friend simd& operator>>=(simd&, int);

  // compares [simd.comparison]
  friend mask_type operator==(const simd&, const simd&);
  friend mask_type operator!=(const simd&, const simd&);
  friend mask_type operator>=(const simd&, const simd&);
  friend mask_type operator<=(const simd&, const simd&);
  friend mask_type operator>(const simd&, const simd&);
  friend mask_type operator<(const simd&, const simd&);
};

// [simd.mask.class]
template 
// TODO: implement simd_mask
class simd_mask {
public:
  using value_type = bool;
  // TODO: this is strawman implementation. Turn it into a proxy type.
  using reference = bool&;
  using simd_type = simd<_Tp, _Abi>;
  using abi_type = _Abi;
  static constexpr size_t size() noexcept;
  simd_mask() = default;

  // broadcast constructor
  explicit simd_mask(value_type) noexcept;

  // implicit type conversion constructor
  template 
  simd_mask(const simd_mask<_Up, simd_abi::fixed_size>&) noexcept;

  // load constructor
  template 
  simd_mask(const value_type*, _Flags);

  // loads [simd.mask.copy]
  template 
  void copy_from(const value_type*, _Flags);
  template 
  void copy_to(value_type*, _Flags) const;

  // scalar access [simd.mask.subscr]
  reference operator[](size_t);
  value_type operator[](size_t) const;

  // unary operators [simd.mask.unary]
  simd_mask operator!() const noexcept;

  // simd_mask binary operators [simd.mask.binary]
  friend simd_mask operator&&(const simd_mask&, const simd_mask&) noexcept;
  friend simd_mask operator||(const simd_mask&, const simd_mask&) noexcept;
  friend simd_mask operator&(const simd_mask&, const simd_mask&)noexcept;
  friend simd_mask operator|(const simd_mask&, const simd_mask&) noexcept;
  friend simd_mask operator^(const simd_mask&, const simd_mask&) noexcept;

  // simd_mask compound assignment [simd.mask.cassign]
  friend simd_mask& operator&=(simd_mask&, const simd_mask&) noexcept;
  friend simd_mask& operator|=(simd_mask&, const simd_mask&) noexcept;
  friend simd_mask& operator^=(simd_mask&, const simd_mask&) noexcept;

  // simd_mask compares [simd.mask.comparison]
  friend simd_mask operator==(const simd_mask&, const simd_mask&) noexcept;
  friend simd_mask operator!=(const simd_mask&, const simd_mask&) noexcept;
};

#endif // _LIBCPP_STD_VER >= 17

_LIBCPP_END_NAMESPACE_EXPERIMENTAL_SIMD

#endif /* _LIBCPP_EXPERIMENTAL_SIMD */