|  | /* | 
|  | * Optmized version of the ip_fast_csum() function | 
|  | * Used for calculating IP header checksum | 
|  | * | 
|  | * Return: 16bit checksum, complemented | 
|  | * | 
|  | * Inputs: | 
|  | *      in0: address of buffer to checksum (char *) | 
|  | *      in1: length of the buffer (int) | 
|  | * | 
|  | * Copyright (C) 2002 Intel Corp. | 
|  | * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com> | 
|  | */ | 
|  |  | 
|  | #include <asm/asmmacro.h> | 
|  |  | 
|  | /* | 
|  | * Since we know that most likely this function is called with buf aligned | 
|  | * on 4-byte boundary and 20 bytes in length, we can execution rather quickly | 
|  | * versus calling generic version of do_csum, which has lots of overhead in | 
|  | * handling various alignments and sizes.  However, due to lack of constrains | 
|  | * put on the function input argument, cases with alignment not on 4-byte or | 
|  | * size not equal to 20 bytes will be handled by the generic do_csum function. | 
|  | */ | 
|  |  | 
|  | #define in0	r32 | 
|  | #define in1	r33 | 
|  | #define ret0	r8 | 
|  |  | 
|  | GLOBAL_ENTRY(ip_fast_csum) | 
|  | .prologue | 
|  | .body | 
|  | cmp.ne	p6,p7=5,in1	// size other than 20 byte? | 
|  | and	r14=3,in0	// is it aligned on 4-byte? | 
|  | add	r15=4,in0	// second source pointer | 
|  | ;; | 
|  | cmp.ne.or.andcm p6,p7=r14,r0 | 
|  | ;; | 
|  | (p7)	ld4	r20=[in0],8 | 
|  | (p7)	ld4	r21=[r15],8 | 
|  | (p6)	br.spnt	.generic | 
|  | ;; | 
|  | ld4	r22=[in0],8 | 
|  | ld4	r23=[r15],8 | 
|  | ;; | 
|  | ld4	r24=[in0] | 
|  | add	r20=r20,r21 | 
|  | add	r22=r22,r23 | 
|  | ;; | 
|  | add	r20=r20,r22 | 
|  | ;; | 
|  | add	r20=r20,r24 | 
|  | ;; | 
|  | shr.u	ret0=r20,16	// now need to add the carry | 
|  | zxt2	r20=r20 | 
|  | ;; | 
|  | add	r20=ret0,r20 | 
|  | ;; | 
|  | shr.u	ret0=r20,16	// add carry again | 
|  | zxt2	r20=r20 | 
|  | ;; | 
|  | add	r20=ret0,r20 | 
|  | ;; | 
|  | shr.u	ret0=r20,16 | 
|  | zxt2	r20=r20 | 
|  | ;; | 
|  | add	r20=ret0,r20 | 
|  | ;; | 
|  | andcm	ret0=-1,r20 | 
|  | .restore sp		// reset frame state | 
|  | br.ret.sptk.many b0 | 
|  | ;; | 
|  |  | 
|  | .generic: | 
|  | .prologue | 
|  | .save ar.pfs, r35 | 
|  | alloc	r35=ar.pfs,2,2,2,0 | 
|  | .save rp, r34 | 
|  | mov	r34=b0 | 
|  | .body | 
|  | dep.z	out1=in1,2,30 | 
|  | mov	out0=in0 | 
|  | ;; | 
|  | br.call.sptk.many b0=do_csum | 
|  | ;; | 
|  | andcm	ret0=-1,ret0 | 
|  | mov	ar.pfs=r35 | 
|  | mov	b0=r34 | 
|  | br.ret.sptk.many b0 | 
|  | END(ip_fast_csum) |