|  | /* memset.S: optimised assembly memset | 
|  | * | 
|  | * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved. | 
|  | * Written by David Howells (dhowells@redhat.com) | 
|  | * | 
|  | * This program is free software; you can redistribute it and/or | 
|  | * modify it under the terms of the GNU General Public License | 
|  | * as published by the Free Software Foundation; either version | 
|  | * 2 of the License, or (at your option) any later version. | 
|  | */ | 
|  |  | 
|  |  | 
|  | .text | 
|  | .p2align	4 | 
|  |  | 
|  | ############################################################################### | 
|  | # | 
|  | # void *memset(void *p, char ch, size_t count) | 
|  | # | 
|  | # - NOTE: must not use any stack. exception detection performs function return | 
|  | #         to caller's fixup routine, aborting the remainder of the set | 
|  | #         GR4, GR7, GR8, and GR11 must be managed | 
|  | # | 
|  | ############################################################################### | 
|  | .globl		memset,__memset_end | 
|  | .type		memset,@function | 
|  | memset: | 
|  | orcc.p		gr10,gr0,gr5,icc3		; GR5 = count | 
|  | andi		gr9,#0xff,gr9 | 
|  | or.p		gr8,gr0,gr4			; GR4 = address | 
|  | beqlr		icc3,#0 | 
|  |  | 
|  | # conditionally write a byte to 2b-align the address | 
|  | setlos.p	#1,gr6 | 
|  | andicc		gr4,#1,gr0,icc0 | 
|  | ckne		icc0,cc7 | 
|  | cstb.p		gr9,@(gr4,gr0)		,cc7,#1 | 
|  | csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3 | 
|  | cadd.p		gr4,gr6,gr4		,cc7,#1 | 
|  | beqlr		icc3,#0 | 
|  |  | 
|  | # conditionally write a word to 4b-align the address | 
|  | andicc.p	gr4,#2,gr0,icc0 | 
|  | subicc		gr5,#2,gr0,icc1 | 
|  | setlos.p	#2,gr6 | 
|  | ckne		icc0,cc7 | 
|  | slli.p		gr9,#8,gr12			; need to double up the pattern | 
|  | cknc		icc1,cc5 | 
|  | or.p		gr9,gr12,gr12 | 
|  | andcr		cc7,cc5,cc7 | 
|  |  | 
|  | csth.p		gr12,@(gr4,gr0)		,cc7,#1 | 
|  | csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3 | 
|  | cadd.p		gr4,gr6,gr4		,cc7,#1 | 
|  | beqlr		icc3,#0 | 
|  |  | 
|  | # conditionally write a dword to 8b-align the address | 
|  | andicc.p	gr4,#4,gr0,icc0 | 
|  | subicc		gr5,#4,gr0,icc1 | 
|  | setlos.p	#4,gr6 | 
|  | ckne		icc0,cc7 | 
|  | slli.p		gr12,#16,gr13			; need to quadruple-up the pattern | 
|  | cknc		icc1,cc5 | 
|  | or.p		gr13,gr12,gr12 | 
|  | andcr		cc7,cc5,cc7 | 
|  |  | 
|  | cst.p		gr12,@(gr4,gr0)		,cc7,#1 | 
|  | csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3 | 
|  | cadd.p		gr4,gr6,gr4		,cc7,#1 | 
|  | beqlr		icc3,#0 | 
|  |  | 
|  | or.p		gr12,gr12,gr13			; need to octuple-up the pattern | 
|  |  | 
|  | # the address is now 8b-aligned - loop around writing 64b chunks | 
|  | setlos		#8,gr7 | 
|  | subi.p		gr4,#8,gr4			; store with update index does weird stuff | 
|  | setlos		#64,gr6 | 
|  |  | 
|  | subicc		gr5,#64,gr0,icc0 | 
|  | 0:	cknc		icc0,cc7 | 
|  | cstdu		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | cstdu		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | cstdu		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | cstdu		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | cstdu		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3 | 
|  | cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | subicc		gr5,#64,gr0,icc0 | 
|  | cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | beqlr		icc3,#0 | 
|  | bnc		icc0,#2,0b | 
|  |  | 
|  | # now do 32-byte remnant | 
|  | subicc.p	gr5,#32,gr0,icc0 | 
|  | setlos		#32,gr6 | 
|  | cknc		icc0,cc7 | 
|  | cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3 | 
|  | cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | setlos		#16,gr6 | 
|  | cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | subicc		gr5,#16,gr0,icc0 | 
|  | cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | beqlr		icc3,#0 | 
|  |  | 
|  | # now do 16-byte remnant | 
|  | cknc		icc0,cc7 | 
|  | cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | csubcc		gr5,gr6,gr5		,cc7,#1	; also set ICC3 | 
|  | cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | beqlr		icc3,#0 | 
|  |  | 
|  | # now do 8-byte remnant | 
|  | subicc		gr5,#8,gr0,icc1 | 
|  | cknc		icc1,cc7 | 
|  | cstdu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3 | 
|  | setlos.p	#4,gr7 | 
|  | beqlr		icc3,#0 | 
|  |  | 
|  | # now do 4-byte remnant | 
|  | subicc		gr5,#4,gr0,icc0 | 
|  | addi.p		gr4,#4,gr4 | 
|  | cknc		icc0,cc7 | 
|  | cstu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3 | 
|  | subicc.p	gr5,#2,gr0,icc1 | 
|  | beqlr		icc3,#0 | 
|  |  | 
|  | # now do 2-byte remnant | 
|  | setlos		#2,gr7 | 
|  | addi.p		gr4,#2,gr4 | 
|  | cknc		icc1,cc7 | 
|  | csthu.p		gr12,@(gr4,gr7)		,cc7,#1 | 
|  | csubcc		gr5,gr7,gr5		,cc7,#1	; also set ICC3 | 
|  | subicc.p	gr5,#1,gr0,icc0 | 
|  | beqlr		icc3,#0 | 
|  |  | 
|  | # now do 1-byte remnant | 
|  | setlos		#0,gr7 | 
|  | addi.p		gr4,#2,gr4 | 
|  | cknc		icc0,cc7 | 
|  | cstb.p		gr12,@(gr4,gr0)		,cc7,#1 | 
|  | bralr | 
|  | __memset_end: | 
|  |  | 
|  | .size		memset, __memset_end-memset | 
|  |  | 
|  | ############################################################################### | 
|  | # | 
|  | # clear memory in userspace | 
|  | # - return the number of bytes that could not be cleared (0 on complete success) | 
|  | # | 
|  | # long __memset_user(void *p, size_t count) | 
|  | # | 
|  | ############################################################################### | 
|  | .globl		__memset_user, __memset_user_error_lr, __memset_user_error_handler | 
|  | .type		__memset_user,@function | 
|  | __memset_user: | 
|  | movsg		lr,gr11 | 
|  |  | 
|  | # abuse memset to do the dirty work | 
|  | or.p		gr9,gr9,gr10 | 
|  | setlos		#0,gr9 | 
|  | call		memset | 
|  | __memset_user_error_lr: | 
|  | jmpl.p		@(gr11,gr0) | 
|  | setlos		#0,gr8 | 
|  |  | 
|  | # deal any exception generated by memset | 
|  | # GR4  - memset's address tracking pointer | 
|  | # GR7  - memset's step value (index register for store insns) | 
|  | # GR8  - memset's original start address | 
|  | # GR10 - memset's original count | 
|  | __memset_user_error_handler: | 
|  | add.p		gr4,gr7,gr4 | 
|  | add		gr8,gr10,gr8 | 
|  | jmpl.p		@(gr11,gr0) | 
|  | sub		gr8,gr4,gr8		; we return the amount left uncleared | 
|  |  | 
|  | .size		__memset_user, .-__memset_user |