aboutsummaryrefslogtreecommitdiff
path: root/arch/arm/lib/memzero.S
blob: 4e11602bfe59d8a593848775633bfeba91a04968 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
/*
 *  linux/arch/arm/lib/memzero.S
 *
 *  Copyright (C) 1995-2000 Russell King
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 *  Optimization for modern ARM platforms
 *  Copyright 2013 Harm Hanemaaijer
 */
#include <linux/linkage.h>
#include <asm/assembler.h>

	.text
	.syntax unified
ARM(	.p2align 5	)
THUMB(	.p2align 2	)

ENTRY(__memzero)
	ands	r3, r0, #3
	mov	ip, r0
	mov	r2, #0
	bne	8f

/*
 * we know that the pointer in r0 is aligned to a word boundary.
 */
1:	cmp	r1, #8
	blt	5f
	mov	r3, r2

	cmp	r1, #64
	push 	{r4}
	blt	4f
#if MEMSET_WRITE_ALIGN_BYTES > 0
	ands	r4, r0, #(MEMSET_WRITE_ALIGN_BYTES - 1)
	/* Let r4 be equal to the number of bytes align.  */
	rsb	r4, r4, #MEMSET_WRITE_ALIGN_BYTES
	/*
	 * At this point r4 contains the number of bytes to align
	 * if eq is not set. The eq flag is set if there are no bytes
	 * to align.
	 */
#if MEMSET_WRITE_ALIGN_BYTES == 8
	subne	r1, r1, r4
	strne	r2, [r0], #4
#elif MEMSET_WRITE_ALIGN_BYTES == 32
	beq	2f
	tst     r4, #4
	sub	r1, r1, r4
	strne	r2, [r0], #4
	tst     r4, #8
	stmiane r0!, {r2, r3}
	cmp	r4, #16
	stmiage r0!, {r2, r3}
        stmiage r0!, {r2, r3}
#endif
	cmp	r1, #64
	blt	4f
#endif

2:	mov	r4, r2
	push	{r5}
	mov	r5, r2

3:	stmia	r0!, {r2, r3, r4, r5}
	subs	r1, r1, #64		/* Thumb16 */
	stmia	r0!, {r2, r3, r4, r5}
	cmp	r1, #64
	stmia	r0!, {r2, r3, r4, r5}
	stmia	r0!, {r2, r3, r4, r5}
	bge	3b

	pop	{r5}
	/* Early exit if there are 0 bytes left. */
THUMB(	cbz	r1, 7f	)
ARM(	teq	r1, #0	)
ARM(	beq	7f	)

	/* Handle 8-64 bytes. */
4:	bic	r4, r1, #7
	subs	r1, r1, r4
	rsb	r4, r4, #64
	/* The stmia instruction is 32-bit for ARM, 16-bit for Thumb2. */
THUMB(	lsrs	r4, r4, #2	)
ARM(	lsrs	r4, r4, #1	)
	add	pc, pc, r4
	nop
	stmia	r0!, {r2, r3}
	stmia	r0!, {r2, r3}
	stmia	r0!, {r2, r3}
	stmia	r0!, {r2, r3}
	stmia	r0!, {r2, r3}
	stmia	r0!, {r2, r3}
	stmia	r0!, {r2, r3}
	stmia	r0!, {r2, r3}
	pop	{r4}

5:	cmp	r1, #4
	strge	r2, [r0], #4
	/* Early exit for multiple of 4 size. */
	ands	r1, r1, #3
	moveq	r0, ip
	moveq	pc, lr

	/*
	 * At this point there are 1, 2 or 3 bytes,
	 * and the destination is word aligned.
	 */
6:	cmp	r1, #2
	strhge	r2, [r0], #2
	strbne	r2, [r0]
	mov	r0, ip
	mov	pc, lr

7:	pop	{r4}
	mov	r0, ip
	mov	pc, lr

8:	cmp	r1, #4
	blt	9f
	cmp	r3, #2
	sub	r1, r1, #4
	strble	r2, [r0]
	strble	r2, [r0, #1]
	addle	r0, r0, #2
	add	r1, r1, r3
	strbne	r2, [r0], #1
	b	1b

	/* 0 to 3 bytes left. */
9:	cmp	r1, #2
	strbge	r2, [r0]
	strbge	r2, [r0, #1]
	addge	r0, r0, #2
	tst	r1, #1
	strbne	r2, [r0]
	mov	pc, lr
ENDPROC(__memzero)