aboutsummaryrefslogtreecommitdiff
path: root/arch/arm/lib/copy_page.S
blob: 119657d74ef38d9349224eb2c46cf80fc997318c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
/*
 *  linux/arch/arm/lib/copypage.S
 *
 *  Copyright (C) 1995-1999 Russell King
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 *  ASM optimised string functions
 *
 *  Optimization for modern ARM platforms
 *  Copyright 2013 Harm Hanemaaijer
 */
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/cache.h>

/*
 * Notes for armv6/v7:
 * These architectures do not like paired preloads in a 64-byte loop.
 * Instead use a 32-byte loop with one preload per loop on armv6
 * (L1_CACHE_BYTES == 32). On armv7 (L1_CACHE_BYTES == 64), use a
 * 64-byte loop with one preload per loop. In addition, make sure no
 * prefetching happens beyond the source region. The prefetch distance,
 * configured in cache.h, defaults to 3 (96 bytes on armv5/armv6, 192
 * bytes on armv7). This function translates to 16-bit Thumb2
 * instructions whenever possible.
 *
 * This version should work on older platforms as well and is unlikely
 * to degrade performance significantly.
 */

#define COPY_COUNT (PAGE_SZ / (L1_CACHE_BYTES))

		.text
	ARM(	.p2align 5	)
	THUMB(	.p2align 2	)

ENTRY(copy_page)
		stmfd	sp!, {r4-r8, lr}
	PLD(	pld	[r1, #0]		)
	PLD(	pld	[r1, #L1_CACHE_BYTES]		)
#if PREFETCH_DISTANCE > 2
	PLD(	pld	[r1, #2 * L1_CACHE_BYTES]	)
#if PREFETCH_DISTANCE > 3
	PLD(	pld	[r1, #3 * L1_CACHE_BYTES]	)
#if PREFETCH_DISTANCE > 4
	PLD(	pld	[r1, #4 * L1_CACHE_BYTES]	)
#endif
#endif
#endif
	PLD(	movs	r2, #(COPY_COUNT - PREFETCH_DISTANCE)	)
	NO_PLD(	mov	r2, #COPY_COUNT				)
1:	PLD(	pld	[r1, #PREFETCH_DISTANCE * L1_CACHE_BYTES])
2:
#if L1_CACHE_BYTES == 32
		ldmia	r1!, {r3-r6}
		ldmia   r1!, {r7, r8, ip, lr}
		stmia	r0!, {r3-r6}
		subs	r2, r2, #1
		stmia   r0!, {r7, r8, ip, lr}
#else /* L1_CACHE_BYTES == 64 */
		ldmia   r1!, {r3-r8, ip, lr}
		stmia	r0!, {r3-r8, ip, lr}
		ldmia   r1!, {r3-r8, ip, lr}
		subs	r2, r2, #1
		stmia	r0!, {r3-r8, ip, lr}
#endif
		bgt	1b
	PLD(	cmn	r2, #PREFETCH_DISTANCE	)
	PLD(	bgt	2b			)
		ldmfd	sp!, {r4-r8, pc}
ENDPROC(copy_page)