Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/linkage.h> #include <asm/asm.h> ENTRY(__memmove) WEAK(memmove) /* * Here we determine if forward copy is possible. Forward copy is * preferred to backward copy as it is more cache friendly. * * If a0 >= a1, t0 gives their distance, if t0 >= a2 then we can * copy forward. * If a0 < a1, we can always copy forward. This will make t0 negative, * so a *unsigned* comparison will always have t0 >= a2. * * For forward copy we just delegate the task to memcpy. */ sub t0, a0, a1 bltu t0, a2, 1f tail __memcpy 1: /* * Register allocation for code below: * a0 - end of uncopied dst * a1 - end of uncopied src * t0 - start of uncopied dst */ mv t0, a0 add a0, a0, a2 add a1, a1, a2 /* * Use bytewise copy if too small. * * This threshold must be at least 2*SZREG to ensure at least one * wordwise copy is performed. It is chosen to be 16 because it will * save at least 7 iterations of bytewise copy, which pays off the * fixed overhead. */ li a3, 16 bltu a2, a3, .Lbyte_copy_tail /* * Bytewise copy first to align t0 to word boundary. */ andi a2, a0, ~(SZREG-1) beq a0, a2, 2f 1: addi a1, a1, -1 lb a5, 0(a1) addi a0, a0, -1 sb a5, 0(a0) bne a0, a2, 1b 2: /* * Now a0 is word-aligned. If a1 is also word aligned, we could perform * aligned word-wise copy. Otherwise we need to perform misaligned * word-wise copy. */ andi a3, a1, SZREG-1 bnez a3, .Lmisaligned_word_copy /* Wordwise copy */ addi t0, t0, SZREG-1 bleu a0, t0, 2f 1: addi a1, a1, -SZREG REG_L a5, 0(a1) addi a0, a0, -SZREG REG_S a5, 0(a0) bgtu a0, t0, 1b 2: addi t0, t0, -(SZREG-1) .Lbyte_copy_tail: /* * Bytewise copy anything left. */ beq a0, t0, 2f 1: addi a1, a1, -1 lb a5, 0(a1) addi a0, a0, -1 sb a5, 0(a0) bne a0, t0, 1b 2: mv a0, t0 ret #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define M_SLL sll #define M_SRL srl #else #define M_SLL srl #define M_SRL sll #endif .Lmisaligned_word_copy: /* * Misaligned word-wise copy. * For misaligned copy we still perform word-wise copy, but we need to * use the value fetched from the previous iteration and do some shifts. * This is safe because we wouldn't access more words than necessary. */ /* Calculate shifts */ slli t3, a3, 3 sub t4, x0, t3 /* negate is okay as shift will only look at LSBs */ /* Load the initial value and align a1 */ andi a1, a1, ~(SZREG-1) REG_L a5, 0(a1) addi t0, t0, SZREG-1 /* At least one iteration will be executed here, no check */ 1: M_SLL a4, a5, t4 addi a1, a1, -SZREG REG_L a5, 0(a1) M_SRL a2, a5, t3 or a2, a2, a4 addi a0, a0, -SZREG REG_S a2, 0(a0) bgtu a0, t0, 1b /* Update pointers to correct value */ addi t0, t0, -(SZREG-1) add a1, a1, a3 j .Lbyte_copy_tail END(__memmove) |