src/third_party/llvm-project/polly/www/experiments/matmul/matmul.s - cobalt - Git at Google

 ; ModuleID = 'matmul.c'
 source_filename = "matmul.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"

 %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
 %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }

 @A = common global [1536 x [1536 x float]] zeroinitializer, align 16
 @B = common global [1536 x [1536 x float]] zeroinitializer, align 16
 @stdout = external global %struct._IO_FILE*, align 8
 @.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
 @C = common global [1536 x [1536 x float]] zeroinitializer, align 16
 @.str.1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1

 ; Function Attrs: nounwind uwtable
 define void @init_array() #0 {
 entry:
   %i = alloca i32, align 4
   %j = alloca i32, align 4
   store i32 0, i32* %i, align 4
   br label %for.cond

 for.cond:                                         ; preds = %for.inc17, %entry
   %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 1536
   br i1 %cmp, label %for.body, label %for.end19

 for.body:                                         ; preds = %for.cond
   store i32 0, i32* %j, align 4
   br label %for.cond1

 for.cond1:                                        ; preds = %for.inc, %for.body
   %1 = load i32, i32* %j, align 4
   %cmp2 = icmp slt i32 %1, 1536
   br i1 %cmp2, label %for.body3, label %for.end

 for.body3:                                        ; preds = %for.cond1
   %2 = load i32, i32* %i, align 4
   %3 = load i32, i32* %j, align 4
   %mul = mul nsw i32 %2, %3
   %rem = srem i32 %mul, 1024
   %add = add nsw i32 1, %rem
   %conv = sitofp i32 %add to double
   %div = fdiv double %conv, 2.000000e+00
   %conv4 = fptrunc double %div to float
   %4 = load i32, i32* %j, align 4
   %idxprom = sext i32 %4 to i64
   %5 = load i32, i32* %i, align 4
   %idxprom5 = sext i32 %5 to i64
   %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom5
   %arrayidx6 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
   store float %conv4, float* %arrayidx6, align 4
   %6 = load i32, i32* %i, align 4
   %7 = load i32, i32* %j, align 4
   %mul7 = mul nsw i32 %6, %7
   %rem8 = srem i32 %mul7, 1024
   %add9 = add nsw i32 1, %rem8
   %conv10 = sitofp i32 %add9 to double
   %div11 = fdiv double %conv10, 2.000000e+00
   %conv12 = fptrunc double %div11 to float
   %8 = load i32, i32* %j, align 4
   %idxprom13 = sext i32 %8 to i64
   %9 = load i32, i32* %i, align 4
   %idxprom14 = sext i32 %9 to i64
   %arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom14
   %arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13
   store float %conv12, float* %arrayidx16, align 4
   br label %for.inc

 for.inc:                                          ; preds = %for.body3
   %10 = load i32, i32* %j, align 4
   %inc = add nsw i32 %10, 1
   store i32 %inc, i32* %j, align 4
   br label %for.cond1

 for.end:                                          ; preds = %for.cond1
   br label %for.inc17

 for.inc17:                                        ; preds = %for.end
   %11 = load i32, i32* %i, align 4
   %inc18 = add nsw i32 %11, 1
   store i32 %inc18, i32* %i, align 4
   br label %for.cond

 for.end19:                                        ; preds = %for.cond
   ret void
 }

 ; Function Attrs: nounwind uwtable
 define void @print_array() #0 {
 entry:
   %i = alloca i32, align 4
   %j = alloca i32, align 4
   store i32 0, i32* %i, align 4
   br label %for.cond

 for.cond:                                         ; preds = %for.inc10, %entry
   %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 1536
   br i1 %cmp, label %for.body, label %for.end12

 for.body:                                         ; preds = %for.cond
   store i32 0, i32* %j, align 4
   br label %for.cond1

 for.cond1:                                        ; preds = %for.inc, %for.body
   %1 = load i32, i32* %j, align 4
   %cmp2 = icmp slt i32 %1, 1536
   br i1 %cmp2, label %for.body3, label %for.end

 for.body3:                                        ; preds = %for.cond1
   %2 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
   %3 = load i32, i32* %j, align 4
   %idxprom = sext i32 %3 to i64
   %4 = load i32, i32* %i, align 4
   %idxprom4 = sext i32 %4 to i64
   %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4
   %arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
   %5 = load float, float* %arrayidx5, align 4
   %conv = fpext float %5 to double
   %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), double %conv)
   %6 = load i32, i32* %j, align 4
   %rem = srem i32 %6, 80
   %cmp6 = icmp eq i32 %rem, 79
   br i1 %cmp6, label %if.then, label %if.end

 if.then:                                          ; preds = %for.body3
   %7 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
   %call8 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0))
   br label %if.end

 if.end:                                           ; preds = %if.then, %for.body3
   br label %for.inc

 for.inc:                                          ; preds = %if.end
   %8 = load i32, i32* %j, align 4
   %inc = add nsw i32 %8, 1
   store i32 %inc, i32* %j, align 4
   br label %for.cond1

 for.end:                                          ; preds = %for.cond1
   %9 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
   %call9 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %9, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0))
   br label %for.inc10

 for.inc10:                                        ; preds = %for.end
   %10 = load i32, i32* %i, align 4
   %inc11 = add nsw i32 %10, 1
   store i32 %inc11, i32* %i, align 4
   br label %for.cond

 for.end12:                                        ; preds = %for.cond
   ret void
 }

 declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1

 ; Function Attrs: nounwind uwtable
 define i32 @main() #0 {
 entry:
   %retval = alloca i32, align 4
   %i = alloca i32, align 4
   %j = alloca i32, align 4
   %k = alloca i32, align 4
   %t_start = alloca double, align 8
   %t_end = alloca double, align 8
   store i32 0, i32* %retval, align 4
   call void @init_array()
   store i32 0, i32* %i, align 4
   br label %for.cond

 for.cond:                                         ; preds = %for.inc28, %entry
   %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 1536
   br i1 %cmp, label %for.body, label %for.end30

 for.body:                                         ; preds = %for.cond
   store i32 0, i32* %j, align 4
   br label %for.cond1

 for.cond1:                                        ; preds = %for.inc25, %for.body
   %1 = load i32, i32* %j, align 4
   %cmp2 = icmp slt i32 %1, 1536
   br i1 %cmp2, label %for.body3, label %for.end27

 for.body3:                                        ; preds = %for.cond1
   %2 = load i32, i32* %j, align 4
   %idxprom = sext i32 %2 to i64
   %3 = load i32, i32* %i, align 4
   %idxprom4 = sext i32 %3 to i64
   %arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4
   %arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
   store float 0.000000e+00, float* %arrayidx5, align 4
   store i32 0, i32* %k, align 4
   br label %for.cond6

 for.cond6:                                        ; preds = %for.inc, %for.body3
   %4 = load i32, i32* %k, align 4
   %cmp7 = icmp slt i32 %4, 1536
   br i1 %cmp7, label %for.body8, label %for.end

 for.body8:                                        ; preds = %for.cond6
   %5 = load i32, i32* %j, align 4
   %idxprom9 = sext i32 %5 to i64
   %6 = load i32, i32* %i, align 4
   %idxprom10 = sext i32 %6 to i64
   %arrayidx11 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom10
   %arrayidx12 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx11, i64 0, i64 %idxprom9
   %7 = load float, float* %arrayidx12, align 4
   %8 = load i32, i32* %k, align 4
   %idxprom13 = sext i32 %8 to i64
   %9 = load i32, i32* %i, align 4
   %idxprom14 = sext i32 %9 to i64
   %arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom14
   %arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13
   %10 = load float, float* %arrayidx16, align 4
   %11 = load i32, i32* %j, align 4
   %idxprom17 = sext i32 %11 to i64
   %12 = load i32, i32* %k, align 4
   %idxprom18 = sext i32 %12 to i64
   %arrayidx19 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom18
   %arrayidx20 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx19, i64 0, i64 %idxprom17
   %13 = load float, float* %arrayidx20, align 4
   %mul = fmul float %10, %13
   %add = fadd float %7, %mul
   %14 = load i32, i32* %j, align 4
   %idxprom21 = sext i32 %14 to i64
   %15 = load i32, i32* %i, align 4
   %idxprom22 = sext i32 %15 to i64
   %arrayidx23 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom22
   %arrayidx24 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx23, i64 0, i64 %idxprom21
   store float %add, float* %arrayidx24, align 4
   br label %for.inc

 for.inc:                                          ; preds = %for.body8
   %16 = load i32, i32* %k, align 4
   %inc = add nsw i32 %16, 1
   store i32 %inc, i32* %k, align 4
   br label %for.cond6

 for.end:                                          ; preds = %for.cond6
   br label %for.inc25

 for.inc25:                                        ; preds = %for.end
   %17 = load i32, i32* %j, align 4
   %inc26 = add nsw i32 %17, 1
   store i32 %inc26, i32* %j, align 4
   br label %for.cond1

 for.end27:                                        ; preds = %for.cond1
   br label %for.inc28

 for.inc28:                                        ; preds = %for.end27
   %18 = load i32, i32* %i, align 4
   %inc29 = add nsw i32 %18, 1
   store i32 %inc29, i32* %i, align 4
   br label %for.cond

 for.end30:                                        ; preds = %for.cond
   ret i32 0
 }

 attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

 !llvm.ident = !{!0}

 !0 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git 081569d9a29c7bc827b2d41f8e62891bbc895e2f) (http://llvm.org/git/llvm.git e117e506536626352e8e47f6c72cd6e2a276622c)"}
	; ModuleID = 'matmul.c'
	source_filename = "matmul.c"
	target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
	target triple = "x86_64-unknown-linux-gnu"

	%struct._IO_FILE = type { i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, %struct._IO_marker, %struct._IO_FILE, i32, i32, i64, i16, i8, [1 x i8], i8, i64, i8, i8, i8, i8, i64, i32, [20 x i8] }
	%struct._IO_marker = type { %struct._IO_marker, %struct._IO_FILE, i32 }

	@A = common global [1536 x [1536 x float]] zeroinitializer, align 16
	@B = common global [1536 x [1536 x float]] zeroinitializer, align 16
	@stdout = external global %struct._IO_FILE*, align 8
	@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
	@C = common global [1536 x [1536 x float]] zeroinitializer, align 16
	@.str.1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1

	; Function Attrs: nounwind uwtable
	define void @init_array() #0 {
	entry:
	%i = alloca i32, align 4
	%j = alloca i32, align 4
	store i32 0, i32* %i, align 4
	br label %for.cond

	for.cond: ; preds = %for.inc17, %entry
	%0 = load i32, i32* %i, align 4
	%cmp = icmp slt i32 %0, 1536
	br i1 %cmp, label %for.body, label %for.end19

	for.body: ; preds = %for.cond
	store i32 0, i32* %j, align 4
	br label %for.cond1

	for.cond1: ; preds = %for.inc, %for.body
	%1 = load i32, i32* %j, align 4
	%cmp2 = icmp slt i32 %1, 1536
	br i1 %cmp2, label %for.body3, label %for.end

	for.body3: ; preds = %for.cond1
	%2 = load i32, i32* %i, align 4
	%3 = load i32, i32* %j, align 4
	%mul = mul nsw i32 %2, %3
	%rem = srem i32 %mul, 1024
	%add = add nsw i32 1, %rem
	%conv = sitofp i32 %add to double
	%div = fdiv double %conv, 2.000000e+00
	%conv4 = fptrunc double %div to float
	%4 = load i32, i32* %j, align 4
	%idxprom = sext i32 %4 to i64
	%5 = load i32, i32* %i, align 4
	%idxprom5 = sext i32 %5 to i64
	%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom5
	%arrayidx6 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
	store float %conv4, float* %arrayidx6, align 4
	%6 = load i32, i32* %i, align 4
	%7 = load i32, i32* %j, align 4
	%mul7 = mul nsw i32 %6, %7
	%rem8 = srem i32 %mul7, 1024
	%add9 = add nsw i32 1, %rem8
	%conv10 = sitofp i32 %add9 to double
	%div11 = fdiv double %conv10, 2.000000e+00
	%conv12 = fptrunc double %div11 to float
	%8 = load i32, i32* %j, align 4
	%idxprom13 = sext i32 %8 to i64
	%9 = load i32, i32* %i, align 4
	%idxprom14 = sext i32 %9 to i64
	%arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom14
	%arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13
	store float %conv12, float* %arrayidx16, align 4
	br label %for.inc

	for.inc: ; preds = %for.body3
	%10 = load i32, i32* %j, align 4
	%inc = add nsw i32 %10, 1
	store i32 %inc, i32* %j, align 4
	br label %for.cond1

	for.end: ; preds = %for.cond1
	br label %for.inc17

	for.inc17: ; preds = %for.end
	%11 = load i32, i32* %i, align 4
	%inc18 = add nsw i32 %11, 1
	store i32 %inc18, i32* %i, align 4
	br label %for.cond

	for.end19: ; preds = %for.cond
	ret void
	}

	; Function Attrs: nounwind uwtable
	define void @print_array() #0 {
	entry:
	%i = alloca i32, align 4
	%j = alloca i32, align 4
	store i32 0, i32* %i, align 4
	br label %for.cond

	for.cond: ; preds = %for.inc10, %entry
	%0 = load i32, i32* %i, align 4
	%cmp = icmp slt i32 %0, 1536
	br i1 %cmp, label %for.body, label %for.end12

	for.body: ; preds = %for.cond
	store i32 0, i32* %j, align 4
	br label %for.cond1

	for.cond1: ; preds = %for.inc, %for.body
	%1 = load i32, i32* %j, align 4
	%cmp2 = icmp slt i32 %1, 1536
	br i1 %cmp2, label %for.body3, label %for.end

	for.body3: ; preds = %for.cond1
	%2 = load %struct._IO_FILE, %struct._IO_FILE* @stdout, align 8
	%3 = load i32, i32* %j, align 4
	%idxprom = sext i32 %3 to i64
	%4 = load i32, i32* %i, align 4
	%idxprom4 = sext i32 %4 to i64
	%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4
	%arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
	%5 = load float, float* %arrayidx5, align 4
	%conv = fpext float %5 to double
	%call = call i32 (%struct._IO_FILE, i8, ...) @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), double %conv)
	%6 = load i32, i32* %j, align 4
	%rem = srem i32 %6, 80
	%cmp6 = icmp eq i32 %rem, 79
	br i1 %cmp6, label %if.then, label %if.end

	if.then: ; preds = %for.body3
	%7 = load %struct._IO_FILE, %struct._IO_FILE* @stdout, align 8
	%call8 = call i32 (%struct._IO_FILE, i8, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0))
	br label %if.end

	if.end: ; preds = %if.then, %for.body3
	br label %for.inc

	for.inc: ; preds = %if.end
	%8 = load i32, i32* %j, align 4
	%inc = add nsw i32 %8, 1
	store i32 %inc, i32* %j, align 4
	br label %for.cond1

	for.end: ; preds = %for.cond1
	%9 = load %struct._IO_FILE, %struct._IO_FILE* @stdout, align 8
	%call9 = call i32 (%struct._IO_FILE, i8, ...) @fprintf(%struct._IO_FILE* %9, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0))
	br label %for.inc10

	for.inc10: ; preds = %for.end
	%10 = load i32, i32* %i, align 4
	%inc11 = add nsw i32 %10, 1
	store i32 %inc11, i32* %i, align 4
	br label %for.cond

	for.end12: ; preds = %for.cond
	ret void
	}

	declare i32 @fprintf(%struct._IO_FILE, i8, ...) #1

	; Function Attrs: nounwind uwtable
	define i32 @main() #0 {
	entry:
	%retval = alloca i32, align 4
	%i = alloca i32, align 4
	%j = alloca i32, align 4
	%k = alloca i32, align 4
	%t_start = alloca double, align 8
	%t_end = alloca double, align 8
	store i32 0, i32* %retval, align 4
	call void @init_array()
	store i32 0, i32* %i, align 4
	br label %for.cond

	for.cond: ; preds = %for.inc28, %entry
	%0 = load i32, i32* %i, align 4
	%cmp = icmp slt i32 %0, 1536
	br i1 %cmp, label %for.body, label %for.end30

	for.body: ; preds = %for.cond
	store i32 0, i32* %j, align 4
	br label %for.cond1

	for.cond1: ; preds = %for.inc25, %for.body
	%1 = load i32, i32* %j, align 4
	%cmp2 = icmp slt i32 %1, 1536
	br i1 %cmp2, label %for.body3, label %for.end27

	for.body3: ; preds = %for.cond1
	%2 = load i32, i32* %j, align 4
	%idxprom = sext i32 %2 to i64
	%3 = load i32, i32* %i, align 4
	%idxprom4 = sext i32 %3 to i64
	%arrayidx = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom4
	%arrayidx5 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx, i64 0, i64 %idxprom
	store float 0.000000e+00, float* %arrayidx5, align 4
	store i32 0, i32* %k, align 4
	br label %for.cond6

	for.cond6: ; preds = %for.inc, %for.body3
	%4 = load i32, i32* %k, align 4
	%cmp7 = icmp slt i32 %4, 1536
	br i1 %cmp7, label %for.body8, label %for.end

	for.body8: ; preds = %for.cond6
	%5 = load i32, i32* %j, align 4
	%idxprom9 = sext i32 %5 to i64
	%6 = load i32, i32* %i, align 4
	%idxprom10 = sext i32 %6 to i64
	%arrayidx11 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom10
	%arrayidx12 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx11, i64 0, i64 %idxprom9
	%7 = load float, float* %arrayidx12, align 4
	%8 = load i32, i32* %k, align 4
	%idxprom13 = sext i32 %8 to i64
	%9 = load i32, i32* %i, align 4
	%idxprom14 = sext i32 %9 to i64
	%arrayidx15 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @A, i64 0, i64 %idxprom14
	%arrayidx16 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx15, i64 0, i64 %idxprom13
	%10 = load float, float* %arrayidx16, align 4
	%11 = load i32, i32* %j, align 4
	%idxprom17 = sext i32 %11 to i64
	%12 = load i32, i32* %k, align 4
	%idxprom18 = sext i32 %12 to i64
	%arrayidx19 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @B, i64 0, i64 %idxprom18
	%arrayidx20 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx19, i64 0, i64 %idxprom17
	%13 = load float, float* %arrayidx20, align 4
	%mul = fmul float %10, %13
	%add = fadd float %7, %mul
	%14 = load i32, i32* %j, align 4
	%idxprom21 = sext i32 %14 to i64
	%15 = load i32, i32* %i, align 4
	%idxprom22 = sext i32 %15 to i64
	%arrayidx23 = getelementptr inbounds [1536 x [1536 x float]], [1536 x [1536 x float]]* @C, i64 0, i64 %idxprom22
	%arrayidx24 = getelementptr inbounds [1536 x float], [1536 x float]* %arrayidx23, i64 0, i64 %idxprom21
	store float %add, float* %arrayidx24, align 4
	br label %for.inc

	for.inc: ; preds = %for.body8
	%16 = load i32, i32* %k, align 4
	%inc = add nsw i32 %16, 1
	store i32 %inc, i32* %k, align 4
	br label %for.cond6

	for.end: ; preds = %for.cond6
	br label %for.inc25

	for.inc25: ; preds = %for.end
	%17 = load i32, i32* %j, align 4
	%inc26 = add nsw i32 %17, 1
	store i32 %inc26, i32* %j, align 4
	br label %for.cond1

	for.end27: ; preds = %for.cond1
	br label %for.inc28

	for.inc28: ; preds = %for.end27
	%18 = load i32, i32* %i, align 4
	%inc29 = add nsw i32 %18, 1
	store i32 %inc29, i32* %i, align 4
	br label %for.cond

	for.end30: ; preds = %for.cond
	ret i32 0
	}

	attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
	attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }

	!llvm.ident = !{!0}

	!0 = !{!"clang version 4.0.0 (http://llvm.org/git/clang.git 081569d9a29c7bc827b2d41f8e62891bbc895e2f) (http://llvm.org/git/llvm.git e117e506536626352e8e47f6c72cd6e2a276622c)"}