Jim Laskey
dcf983ce41
Reduce the workload by not adding chain users to work list.
...
llvm-svn: 30948
2006-10-13 23:32:28 +00:00
Evan Cheng
ab51cf2e78
Merge ISD::TRUNCSTORE to ISD::STORE. Switch to using StoreSDNode.
...
llvm-svn: 30945
2006-10-13 21:14:26 +00:00
Chris Lattner
d0620d2773
Lower X%C into X/C+stuff. This allows the 'division by a constant' logic to
...
apply to rems as well as divs. This fixes PR945 and speeds up ReedSolomon
from 14.57s to 10.90s (which is now faster than gcc).
It compiles CodeGen/X86/rem.ll into:
_test1:
subl $4, %esp
movl %esi, (%esp)
movl $2155905153, %ecx
movl 8(%esp), %esi
movl %esi, %eax
imull %ecx
addl %esi, %edx
movl %edx, %eax
shrl $31, %eax
sarl $7, %edx
addl %eax, %edx
imull $255, %edx, %eax
subl %eax, %esi
movl %esi, %eax
movl (%esp), %esi
addl $4, %esp
ret
_test2:
movl 4(%esp), %eax
movl %eax, %ecx
sarl $31, %ecx
shrl $24, %ecx
addl %eax, %ecx
andl $4294967040, %ecx
subl %ecx, %eax
ret
_test3:
subl $4, %esp
movl %esi, (%esp)
movl $2155905153, %ecx
movl 8(%esp), %esi
movl %esi, %eax
mull %ecx
shrl $7, %edx
imull $255, %edx, %eax
subl %eax, %esi
movl %esi, %eax
movl (%esp), %esi
addl $4, %esp
ret
instead of div/idiv instructions.
llvm-svn: 30920
2006-10-12 20:58:32 +00:00
Chris Lattner
2e33fb453b
add a minor dag combine noticed when looking at PR945
...
llvm-svn: 30915
2006-10-12 20:23:19 +00:00
Jim Laskey
df2ccc395e
D'oh - need to use the rigth kind of store.
...
llvm-svn: 30903
2006-10-12 15:22:24 +00:00
Jim Laskey
a13b9c7aa4
Alias analysis of TRUNCSTORE.
...
llvm-svn: 30889
2006-10-11 18:55:16 +00:00
Jim Laskey
0f7c328ae7
Handle aliasing of loadext.
...
llvm-svn: 30883
2006-10-11 17:47:52 +00:00
Jim Laskey
08edf332ed
Fix regression in combiner alias analysis.
...
llvm-svn: 30880
2006-10-11 13:47:09 +00:00
Evan Cheng
d35734bd1f
Naming consistency.
...
llvm-svn: 30878
2006-10-11 07:10:22 +00:00
Evan Cheng
e71fe34d75
Reflects ISD::LOAD / ISD::LOADX / LoadSDNode changes.
...
llvm-svn: 30844
2006-10-09 20:57:25 +00:00
Chris Lattner
5ab6d8b3fc
Eliminate more token factors by taking advantage of transitivity:
...
if TF depends on A and B, and A depends on B, TF just needs to depend on
A. With Jim's alias-analysis stuff enabled, this compiles the testcase in
PR892 into:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %edx, 28(%esp)
movl %eax, 32(%esp)
movl %eax, 24(%esp)
movl %edx, 36(%esp)
movl 52(%esp), %ecx
movl %ecx, 4(%esp)
movl %eax, 8(%esp)
movl %edx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
instead of:
__Z4test3Val:
subl $44, %esp
call L__Z3foov$stub
movl %eax, 24(%esp)
movl %edx, 28(%esp)
movl 24(%esp), %eax
movl %eax, 32(%esp)
movl 28(%esp), %eax
movl %eax, 36(%esp)
movl 32(%esp), %eax
movl 36(%esp), %ecx
movl 52(%esp), %edx
movl %edx, 4(%esp)
movl %eax, 8(%esp)
movl %ecx, 12(%esp)
movl 48(%esp), %eax
movl %eax, (%esp)
call L__Z3bar3ValS_$stub
addl $44, %esp
ret
llvm-svn: 30821
2006-10-08 22:57:01 +00:00
Jim Laskey
0463e08005
Combiner alias analysis passes Multisource (release-asserts.)
...
llvm-svn: 30818
2006-10-07 23:37:56 +00:00
Evan Cheng
df9ac47e5e
Make use of getStore().
...
llvm-svn: 30759
2006-10-05 23:01:46 +00:00
Jim Laskey
6549d22ef9
Alias analysis code clean ups.
...
llvm-svn: 30753
2006-10-05 15:07:25 +00:00
Jim Laskey
708d0db2d8
More extensive alias analysis.
...
llvm-svn: 30721
2006-10-04 16:53:27 +00:00
Evan Cheng
5d9fd977d3
Combine ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD into ISD::LOADX. Add an
...
extra operand to LOADX to specify the exact value extension type.
llvm-svn: 30714
2006-10-04 00:56:09 +00:00
Jim Laskey
60832693a7
Load chain check is not needed
...
llvm-svn: 30613
2006-09-26 17:44:58 +00:00
Jim Laskey
dde51671e5
Chain can be any operand
...
llvm-svn: 30611
2006-09-26 09:32:41 +00:00
Jim Laskey
5f3e0af9d0
Wrong size for load
...
llvm-svn: 30610
2006-09-26 08:14:06 +00:00
Jim Laskey
b4a864d533
Can't move a load node if it's chain is not used.
...
llvm-svn: 30609
2006-09-26 07:37:42 +00:00
Jim Laskey
7aa0638aa9
Accidental enable of bad code
...
llvm-svn: 30601
2006-09-25 21:11:32 +00:00
Jim Laskey
b5534e5c28
Fix chain dropping in load and drop unused stores in ret blocks.
...
llvm-svn: 30600
2006-09-25 19:32:58 +00:00
Jim Laskey
d07be232ba
Core antialiasing for load and store.
...
llvm-svn: 30597
2006-09-25 16:29:54 +00:00
Evan Cheng
449a0c7e33
Make it work for DAG combine of multi-value nodes.
...
llvm-svn: 30573
2006-09-21 19:04:05 +00:00
Jim Laskey
35f7eebb49
core corrections
...
llvm-svn: 30570
2006-09-21 17:35:47 +00:00
Jim Laskey
5d19d59017
Basic "in frame" alias analysis.
...
llvm-svn: 30568
2006-09-21 16:28:59 +00:00
Chris Lattner
082db3f9aa
fold (aext (and (trunc x), cst)) -> (and x, cst).
...
llvm-svn: 30561
2006-09-21 06:40:43 +00:00
Chris Lattner
fa9f92cf65
Check the right value type. This fixes 186.crafty on x86
...
llvm-svn: 30560
2006-09-21 06:17:39 +00:00
Chris Lattner
8d8a3bf9c9
Compile:
...
int %test(ulong *%tmp) {
%tmp = load ulong* %tmp ; <ulong> [#uses=1]
%tmp.mask = shr ulong %tmp, ubyte 50 ; <ulong> [#uses=1]
%tmp.mask = cast ulong %tmp.mask to ubyte
%tmp2 = and ubyte %tmp.mask, 3 ; <ubyte> [#uses=1]
%tmp2 = cast ubyte %tmp2 to int ; <int> [#uses=1]
ret int %tmp2
}
to:
_test:
movl 4(%esp), %eax
movl 4(%eax), %eax
shrl $18, %eax
andl $3, %eax
ret
instead of:
_test:
movl 4(%esp), %eax
movl 4(%eax), %eax
shrl $18, %eax
# TRUNCATE movb %al, %al
andb $3, %al
movzbl %al, %eax
ret
llvm-svn: 30558
2006-09-21 06:14:31 +00:00
Chris Lattner
a31f0a622b
Generalize (zext (truncate x)) and (sext (truncate x)) folding to work when
...
the src/dst are not the same size. This catches things like "truncate
32-bit X to 8 bits, then zext to 16", which happens a bit on X86.
llvm-svn: 30557
2006-09-21 06:00:20 +00:00
Chris Lattner
c8cd62d381
Compile:
...
int test3(int a, int b) { return (a < 0) ? a : 0; }
to:
_test3:
srawi r2, r3, 31
and r3, r2, r3
blr
instead of:
_test3:
cmpwi cr0, r3, 1
li r2, 0
blt cr0, LBB2_2 ;entry
LBB2_1: ;entry
mr r3, r2
LBB2_2: ;entry
blr
This implements: PowerPC/select_lt0.ll:seli32_a_a
llvm-svn: 30517
2006-09-20 06:41:35 +00:00
Chris Lattner
8746e2cd57
Fold the full generality of (any_extend (truncate x))
...
llvm-svn: 30514
2006-09-20 06:29:17 +00:00
Chris Lattner
8b68decb27
Two things:
...
1. teach SimplifySetCC that '(srl (ctlz x), 5) == 0' is really x != 0.
2. Teach visitSELECT_CC to use SimplifySetCC instead of calling it and
ignoring the result. This allows us to compile:
bool %test(ulong %x) {
%tmp = setlt ulong %x, 4294967296
ret bool %tmp
}
to:
_test:
cntlzw r2, r3
cmplwi cr0, r3, 1
srwi r2, r2, 5
li r3, 0
beq cr0, LBB1_2 ;
LBB1_1: ;
mr r3, r2
LBB1_2: ;
blr
instead of:
_test:
addi r2, r3, -1
cntlzw r2, r2
cntlzw r3, r3
srwi r2, r2, 5
cmplwi cr0, r2, 0
srwi r2, r3, 5
li r3, 0
bne cr0, LBB1_2 ;
LBB1_1: ;
mr r3, r2
LBB1_2: ;
blr
This isn't wonderful, but it's an improvement.
llvm-svn: 30513
2006-09-20 06:19:26 +00:00
Chris Lattner
46d710e6ea
Fold (X & C1) | (Y & C2) -> (X|Y) & C3 when possible.
...
This implements CodeGen/X86/and-or-fold.ll
llvm-svn: 30379
2006-09-14 21:11:37 +00:00
Chris Lattner
97614c86ce
Split rotate matching code out to its own function. Make it stronger, by
...
matching things like ((x >> c1) & c2) | ((x << c3) & c4) to (rot x, c5) & c6
llvm-svn: 30376
2006-09-14 20:50:57 +00:00
Evan Cheng
31305c45da
DAG combiner fix for rotates. Previously the outer-most condition checks
...
for ROTL availability. This prevents it from forming ROTR for targets that
has ROTR only.
llvm-svn: 29997
2006-08-31 07:41:12 +00:00
Evan Cheng
e5570a4c3f
Move isCommutativeBinOp from SelectionDAG.cpp and DAGCombiner.cpp out. Make it a static method of SelectionDAG.
...
llvm-svn: 29951
2006-08-29 06:42:35 +00:00
Chris Lattner
3d27be1333
s|llvm/Support/Visibility.h|llvm/Support/Compiler.h|
...
llvm-svn: 29911
2006-08-27 12:54:02 +00:00
Chris Lattner
6f22ebd8be
change internal impl of dag combiner so that calls to CombineTo never have to
...
make a temporary vector.
llvm-svn: 29618
2006-08-11 17:56:38 +00:00
Chris Lattner
a2f4086828
Change one ReplaceAllUsesWith method to take an array of operands to replace
...
instead of a vector of operands.
llvm-svn: 29616
2006-08-11 17:46:28 +00:00
Chris Lattner
c24a1d3093
Start eliminating temporary vectors used to create DAG nodes. Instead, pass
...
in the start of an array and a count of operands where applicable. In many
cases, the number of operands is known, so this static array can be allocated
on the stack, avoiding the heap. In many other cases, a SmallVector can be
used, which has the same benefit in the common cases.
I updated a lot of code calling getNode that takes a vector, but ran out of
time. The rest of the code should be updated, and these methods should be
removed.
We should also do the same thing to eliminate the methods that take a
vector of MVT::ValueTypes.
It would be extra nice to convert the dagiselemitter to avoid creating vectors
for operands when calling getTargetNode.
llvm-svn: 29566
2006-08-08 02:23:42 +00:00
Reid Spencer
658b9476f0
Initialize some variables the compiler warns about.
...
llvm-svn: 29277
2006-07-25 20:44:41 +00:00
Evan Cheng
7c970b98d0
If a shuffle is a splat, check if the argument is a build_vector with all elements being the same. If so, return the argument.
...
llvm-svn: 29242
2006-07-21 08:25:53 +00:00
Evan Cheng
8472e0c4af
If a shuffle is unary, i.e. one of the vector argument is not needed, turn the
...
operand into a undef and adjust mask accordingly.
llvm-svn: 29232
2006-07-20 22:44:41 +00:00
Andrew Lenharth
ec104a2b41
80 cols
...
llvm-svn: 29221
2006-07-20 17:43:27 +00:00
Andrew Lenharth
c496b418b5
Reduce number of exported symbols
...
llvm-svn: 29220
2006-07-20 17:28:38 +00:00
Chris Lattner
54a34cd20b
Mark these two classes as hidden, shrinking libllbmgcc.dylib by 25K
...
llvm-svn: 28970
2006-06-28 21:58:30 +00:00
Andrew Lenharth
0e57b2cb92
Start on my todo list
...
llvm-svn: 28752
2006-06-12 16:07:18 +00:00
Evan Cheng
64d2846017
visitVBinOp: Can't fold divide by zero!
...
llvm-svn: 28584
2006-05-31 06:08:35 +00:00
Chris Lattner
8f872d2091
Fix a nasty dag combiner bug that caused nondeterminstic crashes (MY FAVORITE!):
...
SimplifySelectOps would eliminate a Select, delete it, then return true.
The clients would see that it did something and return null.
The top level would see a null return, and decide that nothing happened,
proceeding to process the node in other ways: boom.
The fix is simple: clients of SimplifySelectOps should return the select
node itself.
In order to catch really obnoxious boogs like this in the future, add an
assert that nodes are not deleted. We do this by checking for a sentry node
type that the SDNode dtor sets when a node is destroyed.
llvm-svn: 28514
2006-05-27 00:43:02 +00:00
Andrew Lenharth
1dc9ec5874
Move this code to a common place
...
llvm-svn: 28329
2006-05-16 17:42:15 +00:00
Chris Lattner
afe72481f6
Comment out dead variables
...
llvm-svn: 28252
2006-05-12 17:57:54 +00:00
Chris Lattner
66adee93aa
Two simplifications for token factor nodes: simplify tf(x,x) -> x.
...
simplify tf(x,y,y,z) -> tf(x,y,z).
llvm-svn: 28233
2006-05-12 05:01:37 +00:00
Evan Cheng
2c74848af1
Debugging info
...
llvm-svn: 28200
2006-05-09 06:55:15 +00:00
Chris Lattner
446e1ef26a
Make the case I just checked in stronger. Now we compile this:
...
short test2(short X, short x) {
int Y = (short)(X+x);
return Y >> 1;
}
to:
_test2:
add r2, r3, r4
extsh r2, r2
srawi r3, r2, 1
blr
instead of:
_test2:
add r2, r3, r4
extsh r2, r2
srwi r2, r2, 1
extsh r3, r2
blr
llvm-svn: 28175
2006-05-08 21:18:59 +00:00
Chris Lattner
29062da0ac
Implement and_sext.ll:test3, generating:
...
_test4:
srawi r3, r3, 16
blr
instead of:
_test4:
srwi r2, r3, 16
extsh r3, r2
blr
for:
short test4(unsigned X) {
return (X >> 16);
}
llvm-svn: 28174
2006-05-08 20:59:41 +00:00
Chris Lattner
2935d8190c
Compile this:
...
short test4(unsigned X) {
return (X >> 16);
}
to:
_test4:
movl 4(%esp), %eax
sarl $16, %eax
ret
instead of:
_test4:
movl $-65536, %eax
andl 4(%esp), %eax
sarl $16, %eax
ret
llvm-svn: 28171
2006-05-08 20:51:54 +00:00
Nate Begeman
e5ce5bb6da
Fix PR772
...
llvm-svn: 28161
2006-05-08 01:35:01 +00:00
Chris Lattner
7e7bcf3a54
Simplify some code, add a couple minor missed folds
...
llvm-svn: 28152
2006-05-06 23:06:26 +00:00
Chris Lattner
2a4d7b845b
remove cases handled elsewhere
...
llvm-svn: 28150
2006-05-06 22:43:44 +00:00
Chris Lattner
1ecb2a2dac
Use the new TargetLowering::ComputeNumSignBits method to eliminate
...
sign_extend_inreg operations. Though ComputeNumSignBits is still rudimentary,
this is enough to compile this:
short test(short X, short x) {
int Y = X+x;
return (Y >> 1);
}
short test2(short X, short x) {
int Y = (short)(X+x);
return Y >> 1;
}
into:
_test:
add r2, r3, r4
srawi r3, r2, 1
blr
_test2:
add r2, r3, r4
extsh r2, r2
srawi r3, r2, 1
blr
instead of:
_test:
add r2, r3, r4
srawi r2, r2, 1
extsh r3, r2
blr
_test2:
add r2, r3, r4
extsh r2, r2
srawi r2, r2, 1
extsh r3, r2
blr
llvm-svn: 28146
2006-05-06 09:30:03 +00:00
Chris Lattner
907e392dba
Fold trunc(any_ext). This gives stuff like:
...
27,28c27
< movzwl %di, %edi
< movl %edi, %ebx
---
> movw %di, %bx
llvm-svn: 28137
2006-05-05 22:56:26 +00:00
Chris Lattner
57f8c5a387
Shrink shifts when possible.
...
llvm-svn: 28136
2006-05-05 22:53:17 +00:00
Chris Lattner
3d26577396
Fold (fpext (load x)) -> (extload x)
...
llvm-svn: 28130
2006-05-05 21:34:35 +00:00
Chris Lattner
25a5283a86
Fold some common code.
...
llvm-svn: 28124
2006-05-05 06:32:04 +00:00
Chris Lattner
002ee91457
Implement:
...
// fold (and (sext x), (sext y)) -> (sext (and x, y))
// fold (or (sext x), (sext y)) -> (sext (or x, y))
// fold (xor (sext x), (sext y)) -> (sext (xor x, y))
// fold (and (aext x), (aext y)) -> (aext (and x, y))
// fold (or (aext x), (aext y)) -> (aext (or x, y))
// fold (xor (aext x), (aext y)) -> (aext (xor x, y))
llvm-svn: 28123
2006-05-05 06:31:05 +00:00
Chris Lattner
5ac4293606
Pull and through and/or/xor. This compiles some bitfield code to:
...
mov EAX, DWORD PTR [ESP + 4]
mov ECX, DWORD PTR [EAX]
mov EDX, ECX
add EDX, EDX
or EDX, ECX
and EDX, -2147483648
and ECX, 2147483647
or EDX, ECX
mov DWORD PTR [EAX], EDX
ret
instead of:
sub ESP, 4
mov DWORD PTR [ESP], ESI
mov EAX, DWORD PTR [ESP + 8]
mov ECX, DWORD PTR [EAX]
mov EDX, ECX
add EDX, EDX
mov ESI, ECX
and ESI, -2147483648
and EDX, -2147483648
or EDX, ESI
and ECX, 2147483647
or EDX, ECX
mov DWORD PTR [EAX], EDX
mov ESI, DWORD PTR [ESP]
add ESP, 4
ret
llvm-svn: 28122
2006-05-05 06:10:43 +00:00
Chris Lattner
812646aa0c
Implement a variety of simplifications for ANY_EXTEND.
...
llvm-svn: 28121
2006-05-05 05:58:59 +00:00
Chris Lattner
8d6fc20181
Factor some code, add these transformations:
...
// fold (and (trunc x), (trunc y)) -> (trunc (and x, y))
// fold (or (trunc x), (trunc y)) -> (trunc (or x, y))
// fold (xor (trunc x), (trunc y)) -> (trunc (xor x, y))
llvm-svn: 28120
2006-05-05 05:51:50 +00:00
Chris Lattner
2b48a94413
Remove a bogus transformation. This fixes SingleSource/UnitTests/2006-01-23-InitializedBitField.c
...
with some changes I have to the new CFE.
llvm-svn: 28022
2006-04-28 23:33:20 +00:00
Chris Lattner
662e940f73
Fix a couple more memory issues
...
llvm-svn: 27930
2006-04-21 15:32:26 +00:00
Chris Lattner
cc47ab3305
Fix a really subtle and obnoxious memory bug that caused issues with an
...
llvm-gcc4 boostrap. Whenever a node is deleted by the dag combiner, it
*must* be returned by the visit function, or the dag combiner will not
know that the node has been processed (and will, e.g., send it to the
target dag combine xforms).
llvm-svn: 27922
2006-04-20 23:55:59 +00:00
Evan Cheng
a320abc494
Turn a VAND into a VECTOR_SHUFFLE is applicable.
...
DAG combiner can turn a VAND V, <-1, 0, -1, -1>, i.e. vector clear elements,
into a vector shuffle with a zero vector. It only does so when TLI tells it
the xform is profitable.
llvm-svn: 27874
2006-04-20 08:56:16 +00:00
Chris Lattner
e1401e3610
Canonicalize vvector_shuffle(x,x) -> vvector_shuffle(x,undef) to enable patterns
...
to match again :)
llvm-svn: 27533
2006-04-08 05:34:25 +00:00
Chris Lattner
098c01e94e
Codegen shufflevector as VVECTOR_SHUFFLE
...
llvm-svn: 27529
2006-04-08 04:15:24 +00:00
Evan Cheng
613996c55e
1. If both vector operands of a vector_shuffle are undef, turn it into an undef.
...
2. A shuffle mask element can also be an undef.
llvm-svn: 27472
2006-04-06 23:20:43 +00:00
Chris Lattner
4ea52cac01
Do not create ZEXTLOAD's unless we are before legalize or the operation is
...
legal.
llvm-svn: 27402
2006-04-04 17:39:18 +00:00
Chris Lattner
e1e3adf802
Add a missing check, this fixes UnitTests/Vector/sumarray.c
...
llvm-svn: 27375
2006-04-03 17:29:28 +00:00
Chris Lattner
04c00fc844
Add a missing check, which broke a bunch of vector tests.
...
llvm-svn: 27374
2006-04-03 17:21:50 +00:00
Andrew Lenharth
94f012f606
back this out
...
llvm-svn: 27367
2006-04-03 03:16:50 +00:00
Andrew Lenharth
015eaf5f33
This should be a win of every arch
...
llvm-svn: 27364
2006-04-02 21:42:45 +00:00
Chris Lattner
4993249a04
Add a little dag combine to compile this:
...
int %AreSecondAndThirdElementsBothNegative(<4 x float>* %in) {
entry:
%tmp1 = load <4 x float>* %in ; <<4 x float>> [#uses=1]
%tmp = tail call int %llvm.ppc.altivec.vcmpgefp.p( int 1, <4 x float> < float 0x7FF8000000000000, float 0.000000e+00, float 0.000000e+00, float 0x7FF8000000000000 >, <4 x float> %tmp1 ) ; <int> [#uses=1]
%tmp = seteq int %tmp, 0 ; <bool> [#uses=1]
%tmp3 = cast bool %tmp to int ; <int> [#uses=1]
ret int %tmp3
}
into this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
mtspr 256, r2
blr
instead of this:
_AreSecondAndThirdElementsBothNegative:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI1_0)
lis r5, ha16(LCPI1_0)
lvx v0, 0, r3
lvx v1, r5, r4
vcmpgefp. v0, v1, v0
mfcr r3, 2
rlwinm r3, r3, 27, 31, 31
xori r3, r3, 1
cntlzw r3, r3
srwi r3, r3, 5
mtspr 256, r2
blr
llvm-svn: 27356
2006-04-02 06:11:11 +00:00
Chris Lattner
0442a18758
Constant fold all of the vector binops. This allows us to compile this:
...
"vector unsigned char mergeLowHigh = (vector unsigned char)
( 8, 9, 10, 11, 16, 17, 18, 19, 12, 13, 14, 15, 20, 21, 22, 23 );
vector unsigned char mergeHighLow = vec_xor( mergeLowHigh, vec_splat_u8(8));"
aka:
void %test2(<16 x sbyte>* %P) {
store <16 x sbyte> cast (<4 x int> xor (<4 x int> cast (<16 x ubyte> < ubyte 8, ubyte 9, ubyte 10, ubyte 11, ubyte 16, ubyte 17, ubyte 18, ubyte 19, ubyte 12, ubyte 13, ubyte 14, ubyte 15, ubyte 20, ubyte 21, ubyte 22, ubyte 23 > to <4 x int>), <4 x int> cast (<16 x sbyte> < sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8, sbyte 8 > to <4 x int>)) to <16 x sbyte>), <16 x sbyte> * %P
ret void
}
into this:
_test2:
mfspr r2, 256
oris r4, r2, 32768
mtspr 256, r4
li r4, lo16(LCPI2_0)
lis r5, ha16(LCPI2_0)
lvx v0, r5, r4
stvx v0, 0, r3
mtspr 256, r2
blr
instead of this:
_test2:
mfspr r2, 256
oris r4, r2, 49152
mtspr 256, r4
li r4, lo16(LCPI2_0)
lis r5, ha16(LCPI2_0)
vspltisb v0, 8
lvx v1, r5, r4
vxor v0, v1, v0
stvx v0, 0, r3
mtspr 256, r2
blr
... which occurs here:
http://developer.apple.com/hardware/ve/calcspeed.html
llvm-svn: 27343
2006-04-02 03:25:57 +00:00
Chris Lattner
e4e64b6b85
Implement constant folding of bit_convert of arbitrary constant vbuild_vector nodes.
...
llvm-svn: 27341
2006-04-02 02:53:43 +00:00
Chris Lattner
39dcf1a9e2
Delete identity shuffles, implementing CodeGen/Generic/vector-identity-shuffle.ll
...
llvm-svn: 27317
2006-03-31 22:16:43 +00:00
Chris Lattner
7e30af3887
Remove dead *extloads. This allows us to codegen vector.ll:test_extract_elt
...
to:
test_extract_elt:
alloc r3 = ar.pfs,0,1,0,0
adds r8 = 12, r32
;;
ldfs f8 = [r8]
mov ar.pfs = r3
br.ret.sptk.many rp
instead of:
test_extract_elt:
alloc r3 = ar.pfs,0,1,0,0
adds r8 = 28, r32
adds r9 = 24, r32
adds r10 = 20, r32
adds r11 = 16, r32
;;
ldfs f6 = [r8]
;;
ldfs f6 = [r9]
adds r8 = 12, r32
adds r9 = 8, r32
adds r14 = 4, r32
;;
ldfs f6 = [r10]
;;
ldfs f6 = [r11]
ldfs f8 = [r8]
;;
ldfs f6 = [r9]
;;
ldfs f6 = [r14]
;;
ldfs f6 = [r32]
mov ar.pfs = r3
br.ret.sptk.many rp
llvm-svn: 27297
2006-03-31 18:10:41 +00:00
Chris Lattner
2d8551c85b
Delete dead loads in the dag. This allows us to compile
...
vector.ll:test_extract_elt2 into:
_test_extract_elt2:
lfd f1, 32(r3)
blr
instead of:
_test_extract_elt2:
lfd f0, 56(r3)
lfd f0, 48(r3)
lfd f0, 40(r3)
lfd f1, 32(r3)
lfd f0, 24(r3)
lfd f0, 16(r3)
lfd f0, 8(r3)
lfd f0, 0(r3)
blr
llvm-svn: 27296
2006-03-31 18:06:18 +00:00
Chris Lattner
20e619fba3
When building a VVECTOR_SHUFFLE node from extract_element operations, make
...
sure to build it as SHUFFLE(X, undef, mask), not SHUFFLE(X, X, mask).
The later is not canonical form, and prevents the PPC splat pattern from
matching. For a particular splat, we go from generating this:
li r10, lo16(LCPI1_0)
lis r11, ha16(LCPI1_0)
lvx v3, r11, r10
vperm v3, v2, v2, v3
to generating:
vspltw v3, v2, 3
llvm-svn: 27236
2006-03-28 22:19:47 +00:00
Chris Lattner
a46dfe80c8
Canonicalize VECTOR_SHUFFLE(X, X, Y) -> VECTOR_SHUFFLE(X,undef,Y')
...
llvm-svn: 27235
2006-03-28 22:11:53 +00:00
Chris Lattner
c9992548fc
Turn a series of extract_element's feeding a build_vector into a
...
vector_shuffle node. For this:
void test(__m128 *res, __m128 *A, __m128 *B) {
*res = _mm_unpacklo_ps(*A, *B);
}
we now produce this code:
_test:
movl 8(%esp), %eax
movaps (%eax), %xmm0
movl 12(%esp), %eax
unpcklps (%eax), %xmm0
movl 4(%esp), %eax
movaps %xmm0, (%eax)
ret
instead of this:
_test:
subl $76, %esp
movl 88(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, (%esp)
movaps %xmm0, 32(%esp)
movss 4(%esp), %xmm0
movss 32(%esp), %xmm1
unpcklps %xmm0, %xmm1
movl 84(%esp), %eax
movaps (%eax), %xmm0
movaps %xmm0, 16(%esp)
movaps %xmm0, 48(%esp)
movss 20(%esp), %xmm0
movss 48(%esp), %xmm2
unpcklps %xmm0, %xmm2
unpcklps %xmm1, %xmm2
movl 80(%esp), %eax
movaps %xmm2, (%eax)
addl $76, %esp
ret
GCC produces this (with -fomit-frame-pointer):
_test:
subl $12, %esp
movl 20(%esp), %eax
movaps (%eax), %xmm0
movl 24(%esp), %eax
unpcklps (%eax), %xmm0
movl 16(%esp), %eax
movaps %xmm0, (%eax)
addl $12, %esp
ret
llvm-svn: 27233
2006-03-28 20:28:38 +00:00
Chris Lattner
b7163598f9
Don't crash on X^X if X is a vector. Instead, produce a vector of zeros.
...
llvm-svn: 27229
2006-03-28 19:11:05 +00:00
Chris Lattner
dc1eab5886
Don't call SimplifyDemandedBits on vectors
...
llvm-svn: 27128
2006-03-25 22:19:00 +00:00
Chris Lattner
5336a59e4b
fold insertelement(buildvector) -> buildvector if the inserted element # is
...
a constant. This implements test_constant_insert in CodeGen/Generic/vector.ll
llvm-svn: 26851
2006-03-19 01:27:56 +00:00
Nate Begeman
bb01d4f272
Remove BRTWOWAY*
...
Make the PPC backend not dependent on BRTWOWAY_CC and make the branch
selector smarter about the code it generates, fixing a case in the
readme.
llvm-svn: 26814
2006-03-17 01:40:33 +00:00
Chris Lattner
68ac09d5cb
make sure dead token factor nodes are removed by the dag combiner.
...
llvm-svn: 26731
2006-03-13 18:37:30 +00:00
Chris Lattner
d8c2a48d58
Fold X+Y -> X|Y when safe. This implements:
...
Regression/CodeGen/PowerPC/and_add.ll
a case that occurs with dynamic allocas of constant size.
llvm-svn: 26727
2006-03-13 06:51:27 +00:00
Chris Lattner
8bb6cb7d7b
add a couple of missing folds
...
llvm-svn: 26724
2006-03-13 06:26:26 +00:00
Chris Lattner
bdaf4f38b5
Reinstate this now that the offending opposite xform has been removed.
...
llvm-svn: 26548
2006-03-05 19:53:55 +00:00
Evan Cheng
d428e22c07
Back out fold (shl (add x, c1), c2) -> (add (shl x, c2), c1<<c2) for now.
...
It's causing an infinite loop compiling ldecod on x86 / Darwin.
llvm-svn: 26544
2006-03-05 07:30:16 +00:00
Chris Lattner
3bc4050217
Add some simple copysign folds
...
llvm-svn: 26543
2006-03-05 05:30:57 +00:00