# In-Class Exercise 6: LLVM-IR Auto-vectorization Use the following Clang command to compile these functions. Does vectorization occur? What does the result look like? Does the result match your expection? Compiler Command: clang -S -emit-llvm -O3 -Rpass=loop-vectorize -Rpass-analysis=loop-vectorize -Rpass-missed=loop-vectorize ## Function 1 #include void f1(size_t n, float* a, float* b) { for (size_t i = 0; i < n; i++) a[i] += b[i]; } ## Function 2 #include void f2(size_t n, float* restrict a, float* b) { for (size_t i = 0; i < n; i++) a[i] += b[i] < 0 ? b[i] * b[i] : b[i]; } ## Function 3 #include void f3_1(size_t n, float* restrict a, const float* b) { for (size_t i = 4; i < n; i++) a[i] = a[i - 1] * b[i]; } void f3_2(size_t n, float* restrict a, const float* b) { for (size_t i = 4; i < n; i++) a[i] = a[i - 2] * b[i]; } ## Function 4 #include void f4(size_t n, float* restrict a, float* b) { for (size_t i = 0; i < n; i++) a[i] += b[i * 2]; } For this function, also try (separately) the following two additional options: `-mllvm -force-vector-width=4` and `-march=skylake` (if you're on x86-64)