1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * Normally compiler builtins are used, but sometimes the compiler calls out 4 * of line code. Based on asm-i386/string.h. 5 * 6 * This assembly file is re-written from memmove_64.c file. 7 * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> 8 */ 9#include <linux/export.h> 10#include <linux/linkage.h> 11#include <linux/cfi_types.h> 12#include <asm/cpufeatures.h> 13#include <asm/alternative.h> 14 15#undef memmove 16 17.section .noinstr.text, "ax" 18 19/* 20 * Implement memmove(). This can handle overlap between src and dst. 21 * 22 * Input: 23 * rdi: dest 24 * rsi: src 25 * rdx: count 26 * 27 * Output: 28 * rax: dest 29 */ 30SYM_TYPED_FUNC_START(__memmove) 31 32 mov %rdi, %rax 33 34 /* Decide forward/backward copy mode */ 35 cmp %rdi, %rsi 36 jge .Lmemmove_begin_forward 37 mov %rsi, %r8 38 add %rdx, %r8 39 cmp %rdi, %r8 40 jg 2f 41 42#define CHECK_LEN cmp $0x20, %rdx; jb 1f 43#define MEMMOVE_BYTES movq %rdx, %rcx; rep movsb; RET 44.Lmemmove_begin_forward: 45 ALTERNATIVE_2 __stringify(CHECK_LEN), \ 46 __stringify(CHECK_LEN; MEMMOVE_BYTES), X86_FEATURE_ERMS, \ 47 __stringify(MEMMOVE_BYTES), X86_FEATURE_FSRM 48 49 /* 50 * movsq instruction have many startup latency 51 * so we handle small size by general register. 52 */ 53 cmp $680, %rdx 54 jb 3f 55 /* 56 * movsq instruction is only good for aligned case. 57 */ 58 59 cmpb %dil, %sil 60 je 4f 613: 62 sub $0x20, %rdx 63 /* 64 * We gobble 32 bytes forward in each loop. 65 */ 665: 67 sub $0x20, %rdx 68 movq 0*8(%rsi), %r11 69 movq 1*8(%rsi), %r10 70 movq 2*8(%rsi), %r9 71 movq 3*8(%rsi), %r8 72 leaq 4*8(%rsi), %rsi 73 74 movq %r11, 0*8(%rdi) 75 movq %r10, 1*8(%rdi) 76 movq %r9, 2*8(%rdi) 77 movq %r8, 3*8(%rdi) 78 leaq 4*8(%rdi), %rdi 79 jae 5b 80 addq $0x20, %rdx 81 jmp 1f 82 /* 83 * Handle data forward by movsq. 84 */ 85 .p2align 4 864: 87 movq %rdx, %rcx 88 movq -8(%rsi, %rdx), %r11 89 lea -8(%rdi, %rdx), %r10 90 shrq $3, %rcx 91 rep movsq 92 movq %r11, (%r10) 93 jmp 13f 94.Lmemmove_end_forward: 95 96 /* 97 * Handle data backward by movsq. 98 */ 99 .p2align 4 1007: 101 movq %rdx, %rcx 102 movq (%rsi), %r11 103 movq %rdi, %r10 104 leaq -8(%rsi, %rdx), %rsi 105 leaq -8(%rdi, %rdx), %rdi 106 shrq $3, %rcx 107 std 108 rep movsq 109 cld 110 movq %r11, (%r10) 111 jmp 13f 112 113 /* 114 * Start to prepare for backward copy. 115 */ 116 .p2align 4 1172: 118 cmp $0x20, %rdx 119 jb 1f 120 cmp $680, %rdx 121 jb 6f 122 cmp %dil, %sil 123 je 7b 1246: 125 /* 126 * Calculate copy position to tail. 127 */ 128 addq %rdx, %rsi 129 addq %rdx, %rdi 130 subq $0x20, %rdx 131 /* 132 * We gobble 32 bytes backward in each loop. 133 */ 1348: 135 subq $0x20, %rdx 136 movq -1*8(%rsi), %r11 137 movq -2*8(%rsi), %r10 138 movq -3*8(%rsi), %r9 139 movq -4*8(%rsi), %r8 140 leaq -4*8(%rsi), %rsi 141 142 movq %r11, -1*8(%rdi) 143 movq %r10, -2*8(%rdi) 144 movq %r9, -3*8(%rdi) 145 movq %r8, -4*8(%rdi) 146 leaq -4*8(%rdi), %rdi 147 jae 8b 148 /* 149 * Calculate copy position to head. 150 */ 151 addq $0x20, %rdx 152 subq %rdx, %rsi 153 subq %rdx, %rdi 1541: 155 cmpq $16, %rdx 156 jb 9f 157 /* 158 * Move data from 16 bytes to 31 bytes. 159 */ 160 movq 0*8(%rsi), %r11 161 movq 1*8(%rsi), %r10 162 movq -2*8(%rsi, %rdx), %r9 163 movq -1*8(%rsi, %rdx), %r8 164 movq %r11, 0*8(%rdi) 165 movq %r10, 1*8(%rdi) 166 movq %r9, -2*8(%rdi, %rdx) 167 movq %r8, -1*8(%rdi, %rdx) 168 jmp 13f 169 .p2align 4 1709: 171 cmpq $8, %rdx 172 jb 10f 173 /* 174 * Move data from 8 bytes to 15 bytes. 175 */ 176 movq 0*8(%rsi), %r11 177 movq -1*8(%rsi, %rdx), %r10 178 movq %r11, 0*8(%rdi) 179 movq %r10, -1*8(%rdi, %rdx) 180 jmp 13f 18110: 182 cmpq $4, %rdx 183 jb 11f 184 /* 185 * Move data from 4 bytes to 7 bytes. 186 */ 187 movl (%rsi), %r11d 188 movl -4(%rsi, %rdx), %r10d 189 movl %r11d, (%rdi) 190 movl %r10d, -4(%rdi, %rdx) 191 jmp 13f 19211: 193 cmp $2, %rdx 194 jb 12f 195 /* 196 * Move data from 2 bytes to 3 bytes. 197 */ 198 movw (%rsi), %r11w 199 movw -2(%rsi, %rdx), %r10w 200 movw %r11w, (%rdi) 201 movw %r10w, -2(%rdi, %rdx) 202 jmp 13f 20312: 204 cmp $1, %rdx 205 jb 13f 206 /* 207 * Move data for 1 byte. 208 */ 209 movb (%rsi), %r11b 210 movb %r11b, (%rdi) 21113: 212 RET 213SYM_FUNC_END(__memmove) 214EXPORT_SYMBOL(__memmove) 215 216SYM_FUNC_ALIAS_MEMFUNC(memmove, __memmove) 217EXPORT_SYMBOL(memmove) 218