1e209b87ad 
								
							 
						 
						
							
							
								
								remove the partial specialization pass.  It is unmaintained and has bugs.  
							
							... 
							
							
							
							llvm-svn: 123554 
							
						 
						
							2011-01-16 00:27:10 +00:00  
				
					
						
							
							
								 
						
							
								4a1ff16b29 
								
							 
						 
						
							
							
								
								Add missing whitespace.  
							
							... 
							
							
							
							llvm-svn: 123543 
							
						 
						
							2011-01-15 18:42:52 +00:00  
				
					
						
							
							
								 
						
							
								0296a481f9 
								
							 
						 
						
							
							
								
								Make constmerge a two-pass algorithm so that it won't miss merging  
							
							... 
							
							
							
							opporuntities. Fixes PR8978.
llvm-svn: 123541 
							
						 
						
							2011-01-15 18:14:21 +00:00  
				
					
						
							
							
								 
						
							
								ed5f2e504e 
								
							 
						 
						
							
							
								
								Try to unbreak selfhost.  
							
							... 
							
							
							
							llvm-svn: 123537 
							
						 
						
							2011-01-15 11:25:34 +00:00  
				
					
						
							
							
								 
						
							
								540f9536c8 
								
							 
						 
						
							
							
								
								Add a cache that protects mergefunc's internals from more surprises in DenseSet.  
							
							... 
							
							
							
							Also, replace tabs with spaces. Yes, it's 2011.
llvm-svn: 123535 
							
						 
						
							2011-01-15 10:16:23 +00:00  
				
					
						
							
							
								 
						
							
								af26390790 
								
							 
						 
						
							
							
								
								temporarily revert r123526.  While working on a follow-on patch I  
							
							... 
							
							
							
							realize that ConstantFoldTerminator doesn't preserve dominfo.
llvm-svn: 123527 
							
						 
						
							2011-01-15 07:51:19 +00:00  
				
					
						
							
							
								 
						
							
								8df83c4a24 
								
							 
						 
						
							
							
								
								fix rdar://8785296 - -fcatch-undefined-behavior generates inefficient code  
							
							... 
							
							
							
							The basic issue is that isel (very reasonably!) expects conditional branches
to be folded, so CGP leaving around a bunch dead computation feeding
conditional branches isn't such a good idea.  Just fold branches on constants
into unconditional branches.
llvm-svn: 123526 
							
						 
						
							2011-01-15 07:36:13 +00:00  
				
					
						
							
							
								 
						
							
								ee588defc6 
								
							 
						 
						
							
							
								
								simplify code, no functionality change.  
							
							... 
							
							
							
							llvm-svn: 123525 
							
						 
						
							2011-01-15 07:29:01 +00:00  
				
					
						
							
							
								 
						
							
								1b93be501d 
								
							 
						 
						
							
							
								
								Now that instruction optzns can update the iterator as they go, we can  
							
							... 
							
							
							
							have objectsize folding recursively simplify away their result when it
folds.  It is important to catch this here, because otherwise we won't
eliminate the cross-block values at isel and other times.
llvm-svn: 123524 
							
						 
						
							2011-01-15 07:25:29 +00:00  
				
					
						
							
							
								 
						
							
								7a2771440f 
								
							 
						 
						
							
							
								
								make the current instruction iterator an ivar, allowing xforms that  
							
							... 
							
							
							
							potentially invalidate it (like inline asm lowering) to be sunk into
their proper place, cleaning up a ton of code.
llvm-svn: 123523 
							
						 
						
							2011-01-15 07:14:54 +00:00  
				
					
						
							
							
								 
						
							
								9c10d587f6 
								
							 
						 
						
							
							
								
								implement an instcombine xform that canonicalizes casts outside of and-with-constant operations.  
							
							... 
							
							
							
							This fixes rdar://8808586 which observed that we used to compile:
union xy {
        struct x { _Bool b[15]; } x;
        __attribute__((packed))
        struct y {
                __attribute__((packed)) unsigned long b0to7;
                __attribute__((packed)) unsigned int b8to11;
                __attribute__((packed)) unsigned short b12to13;
                __attribute__((packed)) unsigned char b14;
        } y;
};
struct x
foo(union xy *xy)
{
        return xy->x;
}
into:
_foo:                                   ## @foo
	movq	(%rdi), %rax
	movabsq	$1095216660480, %rcx    ## imm = 0xFF00000000
	andq	%rax, %rcx
	movabsq	$-72057594037927936, %rdx ## imm = 0xFF00000000000000
	andq	%rax, %rdx
	movzbl	%al, %esi
	orq	%rdx, %rsi
	movq	%rax, %rdx
	andq	$65280, %rdx            ## imm = 0xFF00
	orq	%rsi, %rdx
	movq	%rax, %rsi
	andq	$16711680, %rsi         ## imm = 0xFF0000
	orq	%rdx, %rsi
	movl	%eax, %edx
	andl	$-16777216, %edx        ## imm = 0xFFFFFFFFFF000000
	orq	%rsi, %rdx
	orq	%rcx, %rdx
	movabsq	$280375465082880, %rcx  ## imm = 0xFF0000000000
	movq	%rax, %rsi
	andq	%rcx, %rsi
	orq	%rdx, %rsi
	movabsq	$71776119061217280, %r8 ## imm = 0xFF000000000000
	andq	%r8, %rax
	orq	%rsi, %rax
	movzwl	12(%rdi), %edx
	movzbl	14(%rdi), %esi
	shlq	$16, %rsi
	orl	%edx, %esi
	movq	%rsi, %r9
	shlq	$32, %r9
	movl	8(%rdi), %edx
	orq	%r9, %rdx
	andq	%rdx, %rcx
	movzbl	%sil, %esi
	shlq	$32, %rsi
	orq	%rcx, %rsi
	movl	%edx, %ecx
	andl	$-16777216, %ecx        ## imm = 0xFFFFFFFFFF000000
	orq	%rsi, %rcx
	movq	%rdx, %rsi
	andq	$16711680, %rsi         ## imm = 0xFF0000
	orq	%rcx, %rsi
	movq	%rdx, %rcx
	andq	$65280, %rcx            ## imm = 0xFF00
	orq	%rsi, %rcx
	movzbl	%dl, %esi
	orq	%rcx, %rsi
	andq	%r8, %rdx
	orq	%rsi, %rdx
	ret
We now compile this into:
_foo:                                   ## @foo
## BB#0:                                ## %entry
	movzwl	12(%rdi), %eax
	movzbl	14(%rdi), %ecx
	shlq	$16, %rcx
	orl	%eax, %ecx
	shlq	$32, %rcx
	movl	8(%rdi), %edx
	orq	%rcx, %rdx
	movq	(%rdi), %rax
	ret
A small improvement :-)
llvm-svn: 123520 
							
						 
						
							2011-01-15 06:32:33 +00:00  
				
					
						
							
							
								 
						
							
								e20dd530d0 
								
							 
						 
						
							
							
								
								one more instcombine variant that is needed to work with future changes,  
							
							... 
							
							
							
							no functionality change currently.
llvm-svn: 123517 
							
						 
						
							2011-01-15 05:50:18 +00:00  
				
					
						
							
							
								 
						
							
								497459d5fd 
								
							 
						 
						
							
							
								
								fix typo  
							
							... 
							
							
							
							llvm-svn: 123516 
							
						 
						
							2011-01-15 05:42:47 +00:00  
				
					
						
							
							
								 
						
							
								f3c4eefff8 
								
							 
						 
						
							
							
								
								Catch ~x < cst  just like ~x < ~y, we currently handle this through  
							
							... 
							
							
							
							means that are about to disappear.
llvm-svn: 123515 
							
						 
						
							2011-01-15 05:41:33 +00:00  
				
					
						
							
							
								 
						
							
								311aa63c87 
								
							 
						 
						
							
							
								
								reduce indentation  
							
							... 
							
							
							
							llvm-svn: 123514 
							
						 
						
							2011-01-15 05:40:29 +00:00  
				
					
						
							
							
								 
						
							
								b68ec5c339 
								
							 
						 
						
							
							
								
								Generalize LoadAndStorePromoter a bit and switch LICM  
							
							... 
							
							
							
							to use it.
llvm-svn: 123501 
							
						 
						
							2011-01-15 00:12:35 +00:00  
				
					
						
							
							
								 
						
							
								3e2f6cf7ae 
								
							 
						 
						
							
							
								
								Fix a false-positive warning.  
							
							... 
							
							
							
							llvm-svn: 123480 
							
						 
						
							2011-01-14 22:31:13 +00:00  
				
					
						
							
							
								 
						
							
								9eb7cb48e4 
								
							 
						 
						
							
							
								
								Enhance GlobalOpt to be able evaluate initializers that involve stores through  
							
							... 
							
							
							
							bitcasts, at least in simple cases.  This fixes clang's CodeGenCXX/virtual-base-dtor.cpp
llvm-svn: 123477 
							
						 
						
							2011-01-14 22:19:20 +00:00  
				
					
						
							
							
								 
						
							
								b498f9aff3 
								
							 
						 
						
							
							
								
								switch SRoA to use LoadAndStorePromoter instead of its own copy of the code.  
							
							... 
							
							
							
							llvm-svn: 123457 
							
						 
						
							2011-01-14 19:50:47 +00:00  
				
					
						
							
							
								 
						
							
								95294b8796 
								
							 
						 
						
							
							
								
								Add a new LoadAndStorePromoter class, which implements the general  
							
							... 
							
							
							
							"promote a bunch of load and stores" logic, allowing the code to
be shared and reused.
llvm-svn: 123456 
							
						 
						
							2011-01-14 19:36:13 +00:00  
				
					
						
							
							
								 
						
							
								9987a6f49b 
								
							 
						 
						
							
							
								
								split SROA into two passes: one that uses DomFrontiers (-scalarrepl)  
							
							... 
							
							
							
							and one that uses SSAUpdater (-scalarrepl-ssa)
llvm-svn: 123436 
							
						 
						
							2011-01-14 08:13:00 +00:00  
				
					
						
							
							
								 
						
							
								543384efb4 
								
							 
						 
						
							
							
								
								Implement full support for promoting allocas to registers using SSAUpdater  
							
							... 
							
							
							
							instead of DomTree/DomFrontier.  This may be interesting for reducing compile 
time.  This is currently disabled, but seems to work just fine.
When this is enabled, we eliminate two runs of dominator frontier, one in the
"early per-function" optimizations and one in the "interlaced with inliner"
function passes.
llvm-svn: 123434 
							
						 
						
							2011-01-14 07:50:47 +00:00  
				
					
						
							
							
								 
						
							
								90f3a9a1c7 
								
							 
						 
						
							
							
								
								indentation  
							
							... 
							
							
							
							llvm-svn: 123426 
							
						 
						
							2011-01-14 04:23:53 +00:00  
				
					
						
							
							
								 
						
							
								7f60dc1eb0 
								
							 
						 
						
							
							
								
								Move some shift transforms out of instcombine and into InstructionSimplify.  
							
							... 
							
							
							
							While there, I noticed that the transform "undef >>a X -> undef" was wrong.
For example if X is 2 then the top two bits must be equal, so the result can
not be anything.  I fixed this in the constant folder as well.  Also, I made
the transform for "X << undef" stronger: it now folds to undef always, even
though X might be zero.  This is in accordance with the LangRef, but I must
admit that it is fairly aggressive.  Also, I added "i32 X << 32 -> undef"
following the LangRef and the constant folder, likewise fairly aggressive.
llvm-svn: 123417 
							
						 
						
							2011-01-14 00:37:45 +00:00  
				
					
						
							
							
								 
						
							
								328e91bbe1 
								
							 
						 
						
							
							
								
								Fix whitespace.  
							
							... 
							
							
							
							llvm-svn: 123396 
							
						 
						
							2011-01-13 20:59:44 +00:00  
				
					
						
							
							
								 
						
							
								c8056a952e 
								
							 
						 
						
							
							
								
								Check for empty structs, and for consistency, zero-element arrays.  
							
							... 
							
							
							
							llvm-svn: 123383 
							
						 
						
							2011-01-13 18:26:59 +00:00  
				
					
						
							
							
								 
						
							
								08713d3c5f 
								
							 
						 
						
							
							
								
								Extend SROA to handle arrays accessed as homogeneous structs and vice versa.  
							
							... 
							
							
							
							This is a minor extension of SROA to handle a special case that is
important for some ARM NEON operations.  Some of the NEON intrinsics
return multiple values, which are handled as struct types containing
multiple elements of the same vector type.  The corresponding return
types declared in the arm_neon.h header have equivalent arrays.  We
need SROA to recognize that it can split up those arrays and structs
into separate vectors, even though they are not always accessed with
the same type.  SROA already handles loads and stores of an entire
alloca by using insertvalue/extractvalue to access the individual
pieces, and that code works the same regardless of whether the type
is a struct or an array.  So, all that needs to be done is to check
for compatible arrays and homogeneous structs.
llvm-svn: 123381 
							
						 
						
							2011-01-13 17:45:11 +00:00  
				
					
						
							
							
								 
						
							
								12eec40c83 
								
							 
						 
						
							
							
								
								Make SROA more aggressive with allocas containing padding.  
							
							... 
							
							
							
							SROA only split up structs and arrays one level at a time, so padding can
only cause trouble if it is located in between the struct or array elements.
llvm-svn: 123380 
							
						 
						
							2011-01-13 17:45:08 +00:00  
				
					
						
							
							
								 
						
							
								30f3ebbc1f 
								
							 
						 
						
							
							
								
								Use SmallVector instead of SmallPtrSet and avoid non-deterministic behavior.  
							
							... 
							
							
							
							llvm-svn: 123318 
							
						 
						
							2011-01-12 19:12:45 +00:00  
				
					
						
							
							
								 
						
							
								dd5f60b7a7 
								
							 
						 
						
							
							
								
								revert 123144, reenabling the rest of memset formation.  
							
							... 
							
							
							
							llvm-svn: 123302 
							
						 
						
							2011-01-12 03:25:15 +00:00  
				
					
						
							
							
								 
						
							
								654098f411 
								
							 
						 
						
							
							
								
								revert r123146 which disabled code that wasn't the root cause  
							
							... 
							
							
							
							of the bootstrap miscompare issue.
llvm-svn: 123299 
							
						 
						
							2011-01-12 01:52:23 +00:00  
				
					
						
							
							
								 
						
							
								fa7c29d255 
								
							 
						 
						
							
							
								
								revert r123149, reenabling an improvement to memcpyopt that wasn't  
							
							... 
							
							
							
							the source of the bootstrap problem.
llvm-svn: 123298 
							
						 
						
							2011-01-12 01:43:46 +00:00  
				
					
						
							
							
								 
						
							
								12cc296bd4 
								
							 
						 
						
							
							
								
								Remove the PR8954 workaround.  
							
							... 
							
							
							
							llvm-svn: 123288 
							
						 
						
							2011-01-11 22:56:41 +00:00  
				
					
						
							
							
								 
						
							
								f2407aa98b 
								
							 
						 
						
							
							
								
								Fix a non-deterministic loop in llvm::MergeBlockIntoPredecessor.  
							
							... 
							
							
							
							DT->changeImmediateDominator() trivially ignores identity updates, so there is
really no need for the uniqueing provided by SmallPtrSet.
I expect this to fix PR8954.
llvm-svn: 123286 
							
						 
						
							2011-01-11 22:54:38 +00:00  
				
					
						
							
							
								 
						
							
								cb9c4f85ec 
								
							 
						 
						
							
							
								
								Dial back the speculative fix for PR8954 a bit, so that we only recompute dominators  
							
							... 
							
							
							
							once at the beginning of GVN instead of once per iteration.
llvm-svn: 123278 
							
						 
						
							2011-01-11 22:14:42 +00:00  
				
					
						
							
							
								 
						
							
								51eb403907 
								
							 
						 
						
							
							
								
								Attempt to fix the bootstrap buildbot. Rafael says this works for him on x86-64 Linux.  
							
							... 
							
							
							
							llvm-svn: 123270 
							
						 
						
							2011-01-11 20:23:34 +00:00  
				
					
						
							
							
								 
						
							
								0022a4b417 
								
							 
						 
						
							
							
								
								Remove dead variable, const-ref-ize an APInt.  
							
							... 
							
							
							
							llvm-svn: 123248 
							
						 
						
							2011-01-11 18:26:37 +00:00  
				
					
						
							
							
								 
						
							
								d41db8f9cb 
								
							 
						 
						
							
							
								
								this pass claims to preserve scev, make sure to tell it about deletions.  
							
							... 
							
							
							
							llvm-svn: 123247 
							
						 
						
							2011-01-11 18:14:50 +00:00  
				
					
						
							
							
								 
						
							
								8e158495f1 
								
							 
						 
						
							
							
								
								Factor the actual simplification out of SimplifyIndirectBrOnSelect and into a new helper function so it can be reused in e.g. an upcoming SimplifySwitchOnSelect.  
							
							... 
							
							
							
							No functional change.
llvm-svn: 123234 
							
						 
						
							2011-01-11 12:52:11 +00:00  
				
					
						
							
							
								 
						
							
								193ce7c4d1 
								
							 
						 
						
							
							
								
								update memdep when an instruction is deleted.  This code isn't  
							
							... 
							
							
							
							actually reached in the testcase in PR8954, but it's safe and good
practice.
llvm-svn: 123224 
							
						 
						
							2011-01-11 08:19:16 +00:00  
				
					
						
							
							
								 
						
							
								e2523b287c 
								
							 
						 
						
							
							
								
								when MergeBlockIntoPredecessor merges two blocks, update MemDep if it  
							
							... 
							
							
							
							is floating around in the ether.
llvm-svn: 123223 
							
						 
						
							2011-01-11 08:16:49 +00:00  
				
					
						
							
							
								 
						
							
								f6ae904e34 
								
							 
						 
						
							
							
								
								Fix FoldSingleEntryPHINodes to update memdep and AA when it deletes  
							
							... 
							
							
							
							phi nodes.  It is called from MergeBlockIntoPredecessor which is 
called from GVN, which claims to preserve these.
I'm skeptical that this is the actual problem behind PR8954, but
this is a stab in the right direction.
llvm-svn: 123222 
							
						 
						
							2011-01-11 08:13:40 +00:00  
				
					
						
							
							
								 
						
							
								dfcfcb49fa 
								
							 
						 
						
							
							
								
								random cleanups  
							
							... 
							
							
							
							llvm-svn: 123221 
							
						 
						
							2011-01-11 08:00:40 +00:00  
				
					
						
							
							
								 
						
							
								63fe78de68 
								
							 
						 
						
							
							
								
								remove a bogus assertion: the latch block of a loop is not  
							
							... 
							
							
							
							neccesarily an uncond branch to the header.  This fixes 
PR8955 (the assertion tripping).
llvm-svn: 123219 
							
						 
						
							2011-01-11 07:47:59 +00:00  
				
					
						
							
							
								 
						
							
								d490c2d2ae 
								
							 
						 
						
							
							
								
								Fix a random missed optimization by making InstCombine more aggressive when determining which bits are demanded by  
							
							... 
							
							
							
							a comparison against a constant.
llvm-svn: 123203 
							
						 
						
							2011-01-11 00:36:45 +00:00  
				
					
						
							
							
								 
						
							
								cf414cf0a6 
								
							 
						 
						
							
							
								
								Teach instcombine about the rest of the SSE and SSE2 conversion  
							
							... 
							
							
							
							intrinsics element dependencies. Reviewed by Nick.
llvm-svn: 123161 
							
						 
						
							2011-01-10 07:19:37 +00:00  
				
					
						
							
							
								 
						
							
								88bc848ab6 
								
							 
						 
						
							
							
								
								another random stab in the dark trying to fix llvm-gcc-i386-linux-selfhost  
							
							... 
							
							
							
							llvm-svn: 123149 
							
						 
						
							2011-01-10 02:34:11 +00:00  
				
					
						
							
							
								 
						
							
								4662bd4b13 
								
							 
						 
						
							
							
								
								another (more) aggressive attempt to bring llvm-gcc-i386-linux-selfhost  
							
							... 
							
							
							
							back to life.
llvm-svn: 123146 
							
						 
						
							2011-01-10 00:47:34 +00:00  
				
					
						
							
							
								 
						
							
								1017fa6746 
								
							 
						 
						
							
							
								
								temporarily disable memset formation from memsets in an effort to restore buildbot stability.  
							
							... 
							
							
							
							llvm-svn: 123144 
							
						 
						
							2011-01-09 23:52:48 +00:00  
				
					
						
							
							
								 
						
							
								caf5c0d037 
								
							 
						 
						
							
							
								
								fix a few old bugs (found by inspection) where we would zap instructions  
							
							... 
							
							
							
							without informing memdep.  This could cause nondeterminstic weirdness 
based on where instructions happen to get allocated, and will hopefully
breath some life into some broken testers.
llvm-svn: 123124 
							
						 
						
							2011-01-09 19:26:10 +00:00  
				
					
						
							
							
								 
						
							
								cc21c4aa98 
								
							 
						 
						
							
							
								
								Instcombine: Fix pattern where the sext did not dominate the icmp using it  
							
							... 
							
							
							
							llvm-svn: 123121 
							
						 
						
							2011-01-09 16:00:11 +00:00  
				
					
						
							
							
								 
						
							
								a42e5915bf 
								
							 
						 
						
							
							
								
								LoopInstSimplify preserves LoopSimplify.  
							
							... 
							
							
							
							llvm-svn: 123117 
							
						 
						
							2011-01-09 12:35:16 +00:00  
				
					
						
							
							
								 
						
							
								a337f5ec5c 
								
							 
						 
						
							
							
								
								reduce indentation.  Print <nuw> and <nsw> when dumping SCEV AddRec's  
							
							... 
							
							
							
							that have the bit set.
llvm-svn: 123104 
							
						 
						
							2011-01-09 02:16:18 +00:00  
				
					
						
							
							
								 
						
							
								7d6433ae76 
								
							 
						 
						
							
							
								
								fix a latent bug in memcpyoptimizer that my recent patches exposed: it wasn't  
							
							... 
							
							
							
							updating memdep when fusing stores together.  This fixes the crash optimizing
the bullet benchmark.
llvm-svn: 123091 
							
						 
						
							2011-01-08 22:19:21 +00:00  
				
					
						
							
							
								 
						
							
								ff6ed2ac5f 
								
							 
						 
						
							
							
								
								tryMergingIntoMemset can only handle constant length memsets.  
							
							... 
							
							
							
							llvm-svn: 123090 
							
						 
						
							2011-01-08 22:11:56 +00:00  
				
					
						
							
							
								 
						
							
								9a1d63ba9f 
								
							 
						 
						
							
							
								
								Merge memsets followed by neighboring memsets and other stores into  
							
							... 
							
							
							
							larger memsets.  Among other things, this fixes rdar://8760394 and
allows us to handle "Example 2" from http://blog.regehr.org/archives/320 ,
compiling it into a single 4096-byte memset:
_mad_synth_mute:                        ## @mad_synth_mute
## BB#0:                                ## %entry
	pushq	%rax
	movl	$4096, %esi             ## imm = 0x1000
	callq	___bzero
	popq	%rax
	ret
llvm-svn: 123089 
							
						 
						
							2011-01-08 21:19:19 +00:00  
				
					
						
							
							
								 
						
							
								5120ebf184 
								
							 
						 
						
							
							
								
								fix an issue in IsPointerOffset that prevented us from recognizing that  
							
							... 
							
							
							
							P and P+1 are relative to the same base pointer.
llvm-svn: 123087 
							
						 
						
							2011-01-08 21:07:56 +00:00  
				
					
						
							
							
								 
						
							
								4dc1fd938f 
								
							 
						 
						
							
							
								
								enhance memcpyopt to merge a store and a subsequent  
							
							... 
							
							
							
							memset into a single larger memset.
llvm-svn: 123086 
							
						 
						
							2011-01-08 20:54:51 +00:00  
				
					
						
							
							
								 
						
							
								c638147e9f 
								
							 
						 
						
							
							
								
								constify TargetData references.  
							
							... 
							
							
							
							Split memset formation logic out into its own
"tryMergingIntoMemset" helper function.
llvm-svn: 123081 
							
						 
						
							2011-01-08 20:24:01 +00:00  
				
					
						
							
							
								 
						
							
								59c82f850d 
								
							 
						 
						
							
							
								
								When loop rotation happens, it is *very* common for the duplicated condbr  
							
							... 
							
							
							
							to be foldable into an uncond branch.  When this happens, we can make a
much simpler CFG for the loop, which is important for nested loop cases
where we want the outer loop to be aggressively optimized.
Handle this case more aggressively.  For example, previously on
phi-duplicate.ll we would get this:
define void @test(i32 %N, double* %G) nounwind ssp {
entry:
  %cmp1 = icmp slt i64 1, 1000
  br i1 %cmp1, label %bb.nph, label %for.end
bb.nph:                                           ; preds = %entry
  br label %for.body
for.body:                                         ; preds = %bb.nph, %for.cond
  %j.02 = phi i64 [ 1, %bb.nph ], [ %inc, %for.cond ]
  %arrayidx = getelementptr inbounds double* %G, i64 %j.02
  %tmp3 = load double* %arrayidx
  %sub = sub i64 %j.02, 1
  %arrayidx6 = getelementptr inbounds double* %G, i64 %sub
  %tmp7 = load double* %arrayidx6
  %add = fadd double %tmp3, %tmp7
  %arrayidx10 = getelementptr inbounds double* %G, i64 %j.02
  store double %add, double* %arrayidx10
  %inc = add nsw i64 %j.02, 1
  br label %for.cond
for.cond:                                         ; preds = %for.body
  %cmp = icmp slt i64 %inc, 1000
  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
for.cond.for.end_crit_edge:                       ; preds = %for.cond
  br label %for.end
for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
  ret void
}
Now we get the much nicer:
define void @test(i32 %N, double* %G) nounwind ssp {
entry:
  br label %for.body
for.body:                                         ; preds = %entry, %for.body
  %j.01 = phi i64 [ 1, %entry ], [ %inc, %for.body ]
  %arrayidx = getelementptr inbounds double* %G, i64 %j.01
  %tmp3 = load double* %arrayidx
  %sub = sub i64 %j.01, 1
  %arrayidx6 = getelementptr inbounds double* %G, i64 %sub
  %tmp7 = load double* %arrayidx6
  %add = fadd double %tmp3, %tmp7
  %arrayidx10 = getelementptr inbounds double* %G, i64 %j.01
  store double %add, double* %arrayidx10
  %inc = add nsw i64 %j.01, 1
  %cmp = icmp slt i64 %inc, 1000
  br i1 %cmp, label %for.body, label %for.end
for.end:                                          ; preds = %for.body
  ret void
}
With all of these recent changes, we are now able to compile:
void foo(char *X) {
 for (int i = 0; i != 100; ++i) 
   for (int j = 0; j != 100; ++j)
     X[j+i*100] = 0;
}
into a single memset of 10000 bytes.  This series of changes
should also be helpful for other nested loop scenarios as well.
llvm-svn: 123079 
							
						 
						
							2011-01-08 19:59:06 +00:00  
				
					
						
							
							
								 
						
							
								30f318e5d1 
								
							 
						 
						
							
							
								
								split ssa updating code out to its own helper function.  Don't bother  
							
							... 
							
							
							
							moving the OrigHeader block anymore: we just merge it away anyway so
its code layout doesn't matter.
llvm-svn: 123077 
							
						 
						
							2011-01-08 19:26:33 +00:00  
				
					
						
							
							
								 
						
							
								2615130e1d 
								
							 
						 
						
							
							
								
								Implement a TODO: Enhance loopinfo to merge away the unconditional branch  
							
							... 
							
							
							
							that it was leaving in loops after rotation (between the original latch
block and the original header.
With this change, it is possible for rotated loops to have just a single
basic block, which is useful.
llvm-svn: 123075 
							
						 
						
							2011-01-08 19:10:28 +00:00  
				
					
						
							
							
								 
						
							
								930b716e1b 
								
							 
						 
						
							
							
								
								various code cleanups, enhance MergeBlockIntoPredecessor to preserve  
							
							... 
							
							
							
							loop info.
llvm-svn: 123074 
							
						 
						
							2011-01-08 19:08:40 +00:00  
				
					
						
							
							
								 
						
							
								fee37c5fa3 
								
							 
						 
						
							
							
								
								inline preserveCanonicalLoopForm now that it is simple.  
							
							... 
							
							
							
							llvm-svn: 123073 
							
						 
						
							2011-01-08 18:55:50 +00:00  
				
					
						
							
							
								 
						
							
								063dca0f6a 
								
							 
						 
						
							
							
								
								Three major changes:  
							
							... 
							
							
							
							1. Rip out LoopRotate's domfrontier updating code.  It isn't
   needed now that LICM doesn't use DF and it is super complex
   and gross.
2. Make DomTree updating code a lot simpler and faster.  The 
   old loop over all the blocks was just to find a block??
3. Change the code that inserts the new preheader to just use
   SplitCriticalEdge instead of doing an overcomplex 
   reimplementation of it.
No behavior change, except for the name of the inserted preheader.
llvm-svn: 123072 
							
						 
						
							2011-01-08 18:52:51 +00:00  
				
					
						
							
							
								 
						
							
								30d95f9f87 
								
							 
						 
						
							
							
								
								reduce nesting.  
							
							... 
							
							
							
							llvm-svn: 123071 
							
						 
						
							2011-01-08 18:47:43 +00:00  
				
					
						
							
							
								 
						
							
								7fab23bc1d 
								
							 
						 
						
							
							
								
								LoopRotate requires canonical loop form, so it always has preheaders  
							
							... 
							
							
							
							and latch blocks.  Reorder entry conditions to make hte pass faster
and more logical.
llvm-svn: 123069 
							
						 
						
							2011-01-08 18:06:22 +00:00  
				
					
						
							
							
								 
						
							
								d62691f4e8 
								
							 
						 
						
							
							
								
								use the LI ivar.  
							
							... 
							
							
							
							llvm-svn: 123068 
							
						 
						
							2011-01-08 17:49:51 +00:00  
				
					
						
							
							
								 
						
							
								385f2ec6d8 
								
							 
						 
						
							
							
								
								some cleanups: remove dead arguments and eliminate ivars  
							
							... 
							
							
							
							that are just passed to one function.
llvm-svn: 123067 
							
						 
						
							2011-01-08 17:48:33 +00:00  
				
					
						
							
							
								 
						
							
								25ba40a0cc 
								
							 
						 
						
							
							
								
								fix an issue duncan pointed out, which could cause loop rotate  
							
							... 
							
							
							
							to violate LCSSA form
llvm-svn: 123066 
							
						 
						
							2011-01-08 17:38:45 +00:00  
				
					
						
							
							
								 
						
							
								b4ab257bcc 
								
							 
						 
						
							
							
								
								Fix coding style issues.  
							
							... 
							
							
							
							llvm-svn: 123065 
							
						 
						
							2011-01-08 17:07:11 +00:00  
				
					
						
							
							
								 
						
							
								84986b298a 
								
							 
						 
						
							
							
								
								Make more passes preserve dominators (or state that they preserve dominators if  
							
							... 
							
							
							
							they all ready do). This removes two dominator recomputations prior to isel,
which is a 1% improvement in total llc time for 403.gcc.
The only potentially suspect thing is making GCStrategy recompute dominators if
it used a custom lowering strategy.
llvm-svn: 123064 
							
						 
						
							2011-01-08 17:01:52 +00:00  
				
					
						
							
							
								 
						
							
								80bd9af7c5 
								
							 
						 
						
							
							
								
								Contract subloop bodies. However, it is still important to visit the phis at the  
							
							... 
							
							
							
							top of subloop headers, as the phi uses logically occur outside of the subloop.
llvm-svn: 123062 
							
						 
						
							2011-01-08 15:52:22 +00:00  
				
					
						
							
							
								 
						
							
								6a1fb8f235 
								
							 
						 
						
							
							
								
								Fix a bug in r123034 (trying to sext/zext non-integers) and clean up a little.  
							
							... 
							
							
							
							llvm-svn: 123061 
							
						 
						
							2011-01-08 10:51:36 +00:00  
				
					
						
							
							
								 
						
							
								8c5defd0b0 
								
							 
						 
						
							
							
								
								Have loop-rotate simplify instructions (yay instsimplify!) as it clones  
							
							... 
							
							
							
							them into the loop preheader, eliminating silly instructions like
"icmp i32 0, 100" in fixed tripcount loops.  This also better exposes the 
bigger problem with loop rotate that I'd like to fix: once this has been
folded, the duplicated conditional branch *often* turns into an uncond branch.
Not aggressively handling this is pessimizing later loop optimizations 
somethin' fierce by making "dominates all exit blocks" checks fail.
llvm-svn: 123060 
							
						 
						
							2011-01-08 08:24:46 +00:00  
				
					
						
							
							
								 
						
							
								43f8d16482 
								
							 
						 
						
							
							
								
								Revamp the ValueMapper interfaces in a couple ways:  
							
							... 
							
							
							
							1. Take a flags argument instead of a bool.  This makes
   it more clear to the reader what it is used for.
2. Add a flag that says that "remapping a value not in the
   map is ok".
3. Reimplement MapValue to share a bunch of code and be a lot
   more efficient.  For lookup failures, don't drop null values
   into the map.
4. Using the new flag a bunch of code can vaporize in LinkModules
   and LoopUnswitch, kill it.
No functionality change.
llvm-svn: 123058 
							
						 
						
							2011-01-08 08:15:20 +00:00  
				
					
						
							
							
								 
						
							
								2b3f20e6ec 
								
							 
						 
						
							
							
								
								two minor changes: switch to the standard ValueToValueMapTy  
							
							... 
							
							
							
							map from ValueMapper.h (giving us access to its utilities)
and add a fastpath in the loop rotation code, avoiding expensive
ssa updator manipulation for values with nothing to update.
llvm-svn: 123057 
							
						 
						
							2011-01-08 07:21:31 +00:00  
				
					
						
							
							
								 
						
							
								fc3d7f664b 
								
							 
						 
						
							
							
								
								InstCombine: Match min/max hidden by sext/zext  
							
							... 
							
							
							
							X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X
X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X
X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X
X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X
X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X
X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X
Instead of calculating this with mixed types promote all to the
larger type. This enables scalar evolution to analyze this
expression. PR8866
llvm-svn: 123034 
							
						 
						
							2011-01-07 21:33:14 +00:00  
				
					
						
							
							
								 
						
							
								411e6eedff 
								
							 
						 
						
							
							
								
								Some whitespace fixes  
							
							... 
							
							
							
							llvm-svn: 123033 
							
						 
						
							2011-01-07 21:33:13 +00:00  
				
					
						
							
							
								 
						
							
								134cde912a 
								
							 
						 
						
							
							
								
								Revert 122959, it needs more thought. Add it back to README.txt with additional notes.  
							
							... 
							
							
							
							llvm-svn: 123030 
							
						 
						
							2011-01-07 20:42:20 +00:00  
				
					
						
							
							
								 
						
							
								89afb43b1e 
								
							 
						 
						
							
							
								
								Remove all uses of the "ugly" method BranchInst::setUnconditionalDest().  
							
							... 
							
							
							
							llvm-svn: 123025 
							
						 
						
							2011-01-07 20:25:56 +00:00  
				
					
						
							
							
								 
						
							
								ae67cc13a9 
								
							 
						 
						
							
							
								
								InstCombine: Turn _chk functions into the "unsafe" variant if length and max langth are equal.  
							
							... 
							
							
							
							This happens when we take the (non-constant) length from a malloc.
llvm-svn: 122961 
							
						 
						
							2011-01-06 14:22:52 +00:00  
				
					
						
							
							
								 
						
							
								799b011276 
								
							 
						 
						
							
							
								
								InstCombine: If we call llvm.objectsize on a malloc call we can replace it with the size passed to malloc.  
							
							... 
							
							
							
							llvm-svn: 122959 
							
						 
						
							2011-01-06 13:11:05 +00:00  
				
					
						
							
							
								 
						
							
								a76cc117e0 
								
							 
						 
						
							
							
								
								InstCombine: Teach llvm.objectsize folding to look through GEPs.  
							
							... 
							
							
							
							llvm-svn: 122958 
							
						 
						
							2011-01-06 13:07:49 +00:00  
				
					
						
							
							
								 
						
							
								9ec19ea06a 
								
							 
						 
						
							
							
								
								Add the CallInst optimizations that don't involve expanding inline assembly to  
							
							... 
							
							
							
							OptimizeInst() so that they can be used on a worklist instruction.
llvm-svn: 122945 
							
						 
						
							2011-01-06 02:56:42 +00:00  
				
					
						
							
							
								 
						
							
								d28c78eb4f 
								
							 
						 
						
							
							
								
								Move the GEP handling in CodeGenPrepare to OptimizeInst().  
							
							... 
							
							
							
							llvm-svn: 122944 
							
						 
						
							2011-01-06 02:44:52 +00:00  
				
					
						
							
							
								 
						
							
								14ac865ca9 
								
							 
						 
						
							
							
								
								Split the optimizations in CodeGenPrepare that don't manipulate the iterators  
							
							... 
							
							
							
							into a separate function, so that it can be called from a loop using a worklist
rather than a loop traversing a whole basic block.
llvm-svn: 122943 
							
						 
						
							2011-01-06 02:37:26 +00:00  
				
					
						
							
							
								 
						
							
								70be93a200 
								
							 
						 
						
							
							
								
								Zap the last two -Wself-assign warnings in llvm.  
							
							... 
							
							
							
							Simplify RALinScan::DowngradeRegister with TRI::getOverlaps while we are there.
llvm-svn: 122940 
							
						 
						
							2011-01-06 01:33:22 +00:00  
				
					
						
							
							
								 
						
							
								ce3b930a98 
								
							 
						 
						
							
							
								
								Stop reallocating SunkAddrs for each basic block. When we move to an instruction  
							
							... 
							
							
							
							worklist, the key will need to become std::pair<BasicBlock*, Value*>.
llvm-svn: 122932 
							
						 
						
							2011-01-06 00:42:50 +00:00  
				
					
						
							
							
								 
						
							
								b62ccb241b 
								
							 
						 
						
							
							
								
								Add some more statistics to CodeGenPrepare.  
							
							... 
							
							
							
							llvm-svn: 122891 
							
						 
						
							2011-01-05 17:47:38 +00:00  
				
					
						
							
							
								 
						
							
								ced753fadf 
								
							 
						 
						
							
							
								
								Add some stats to CodeGenPrepare to make it easier to speed it up without  
							
							... 
							
							
							
							regressing code quality.
llvm-svn: 122887 
							
						 
						
							2011-01-05 17:27:27 +00:00  
				
					
						
							
							
								 
						
							
								6a78995369 
								
							 
						 
						
							
							
								
								Use pop_back_val instead of back followed by pop_back.  
							
							... 
							
							
							
							llvm-svn: 122876 
							
						 
						
							2011-01-05 16:08:47 +00:00  
				
					
						
							
							
								 
						
							
								5a2bb998ac 
								
							 
						 
						
							
							
								
								Use a worklist for later iterations just like ordinary instsimplify. The next  
							
							... 
							
							
							
							step is to only process instructions in subloops if they have been modified by
an earlier simplification.
llvm-svn: 122869 
							
						 
						
							2011-01-05 05:47:47 +00:00  
				
					
						
							
							
								 
						
							
								4c51d122d5 
								
							 
						 
						
							
							
								
								Change LoopInstSimplify back to a LoopPass. It revisits subloops rather than  
							
							... 
							
							
							
							skipping them, but it should probably use a worklist and only revisit those
instructions in subloops that have actually changed. It should probably also
use a worklist after the first iteration like instsimplify now does. Regardless,
it's only 0.3% of opt -O2 time on 403.gcc if it replaces the instcombine placed
in the middle of the loop passes.
llvm-svn: 122868 
							
						 
						
							2011-01-05 05:15:53 +00:00  
				
					
						
							
							
								 
						
							
								7b25ff04bd 
								
							 
						 
						
							
							
								
								Don't bother value numbering instructions with void types in GVN. In theory this should allow us to insert  
							
							... 
							
							
							
							fewer things into the value numbering maps, but any speedup is beneath the noise threshold on my machine
on 403.gcc.
llvm-svn: 122844 
							
						 
						
							2011-01-04 22:15:21 +00:00  
				
					
						
							
							
								 
						
							
								e39cb57b09 
								
							 
						 
						
							
							
								
								Complete the NumberTable --> LeaderTable rename.  
							
							... 
							
							
							
							llvm-svn: 122828 
							
						 
						
							2011-01-04 19:29:46 +00:00  
				
					
						
							
							
								 
						
							
								d7d06d3aaf 
								
							 
						 
						
							
							
								
								Fix typo in a comment.  
							
							... 
							
							
							
							llvm-svn: 122827 
							
						 
						
							2011-01-04 19:25:18 +00:00  
				
					
						
							
							
								 
						
							
								51489b3b28 
								
							 
						 
						
							
							
								
								Prune #include's.  
							
							... 
							
							
							
							llvm-svn: 122826 
							
						 
						
							2011-01-04 19:24:57 +00:00  
				
					
						
							
							
								 
						
							
								c7c3bc63f7 
								
							 
						 
						
							
							
								
								Clarify terminology, settling on referring to what was the "number table" as the "leader table", and  
							
							... 
							
							
							
							rename methods to make it much more clear what they're doing.
llvm-svn: 122823 
							
						 
						
							2011-01-04 19:13:25 +00:00  
				
					
						
							
							
								 
						
							
								83546f2fe0 
								
							 
						 
						
							
							
								
								When removing a value from GVN's leaders list, don't drop the Next pointer in a corner case.  
							
							... 
							
							
							
							llvm-svn: 122822 
							
						 
						
							2011-01-04 19:10:54 +00:00