#!/bin/sh
set -ex
trap 'rm o' 0
cflags=$(pkg-config --cflags --libs libhwy)
c++ -x c++ - -o o -static-libasan -fsanitize=address -Wall -Wextra -Wpedantic $cflags <<EOF
#include <hwy/highway.h>
#include <cstring>
#include <stdio.h>
#include <time.h>
#include <stdlib.h>

namespace hn = hwy::HWY_NAMESPACE;
using T = int;
void MulAddLoop(const T* HWY_RESTRICT mul_array,
                const T* HWY_RESTRICT add_array,
                const size_t size, T* HWY_RESTRICT x_array) {
  const hn::ScalableTag<T> d;
  for (size_t i = 0; i < size; i += hn::Lanes(d)) {
    const auto mul = hn::Load(d, mul_array + i);
    const auto add = hn::Load(d, add_array + i);
    auto x = hn::Load(d, x_array + i);
    x = hn::MulAdd(mul, x, add);
    hn::Store(x, d, x_array + i);
  }
}
int main(){
  /* Initialize arrays with random small integers. */
  srand(time(NULL));
  const hn::ScalableTag<T> d;
  size_t size = hn::Lanes(d)*3;
  T *a      = (T*)malloc(size*sizeof(T));
  T *b      = (T*)malloc(size*sizeof(T));
  T *c      = (T*)malloc(size*sizeof(T));
  T *c_orig = (T*)malloc(size*sizeof(T));
  for(size_t i=0; i<size; i++) {
    a[i] = rand()%9-4;
    b[i] = rand()%9-4;
    c[i] = rand()%9-4;
  }
  memcpy(c_orig, c, size*sizeof(T));

  /* Run MulAdd on them. */
  MulAddLoop(a, b, size, c);

  /* Check that the SIMD function gives the same result as the
   * unserialized math. */
  int ret = 0;
  for(size_t i=0; i<size; i++) {
    auto expected = c_orig[i]*a[i]+b[i];
    printf("%zuth element is %d (%d expected); (a,b,c_orig)=(%d,%d,%d).\n",
           i, c[i], expected, a[i], b[i], c_orig[i]);
    ret += expected != c[i];
  }

  free(c_orig);
  free(c);
  free(b);
  free(a);
  printf("Got %d unexpected results.\n", ret);
  return ret;
}
EOF
./o
