Loading...
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2013 Regents of the University of California */ #include <linux/linkage.h> #include <asm/asm.h> /* void *memcpy(void *, const void *, size_t) */ ENTRY(__memcpy) WEAK(memcpy) beq a0, a1, .copy_end /* Save for return value */ mv t6, a0 /* * Register allocation for code below: * a0 - start of uncopied dst * a1 - start of uncopied src * t0 - end of uncopied dst */ add t0, a0, a2 /* * Use bytewise copy if too small. * * This threshold must be at least 2*SZREG to ensure at least one * wordwise copy is performed. It is chosen to be 16 because it will * save at least 7 iterations of bytewise copy, which pays off the * fixed overhead. */ li a3, 16 bltu a2, a3, .Lbyte_copy_tail /* * Bytewise copy first to align a0 to word boundary. */ addi a2, a0, SZREG-1 andi a2, a2, ~(SZREG-1) beq a0, a2, 2f 1: lb a5, 0(a1) addi a1, a1, 1 sb a5, 0(a0) addi a0, a0, 1 bne a0, a2, 1b 2: /* * Now a0 is word-aligned. If a1 is also word aligned, we could perform * aligned word-wise copy. Otherwise we need to perform misaligned * word-wise copy. */ andi a3, a1, SZREG-1 bnez a3, .Lmisaligned_word_copy /* Unrolled wordwise copy */ addi t0, t0, -(16*SZREG-1) bgeu a0, t0, 2f 1: REG_L a2, 0(a1) REG_L a3, SZREG(a1) REG_L a4, 2*SZREG(a1) REG_L a5, 3*SZREG(a1) REG_L a6, 4*SZREG(a1) REG_L a7, 5*SZREG(a1) REG_L t1, 6*SZREG(a1) REG_L t2, 7*SZREG(a1) REG_L t3, 8*SZREG(a1) REG_L t4, 9*SZREG(a1) REG_L t5, 10*SZREG(a1) REG_S a2, 0(a0) REG_S a3, SZREG(a0) REG_S a4, 2*SZREG(a0) REG_S a5, 3*SZREG(a0) REG_S a6, 4*SZREG(a0) REG_S a7, 5*SZREG(a0) REG_S t1, 6*SZREG(a0) REG_S t2, 7*SZREG(a0) REG_S t3, 8*SZREG(a0) REG_S t4, 9*SZREG(a0) REG_S t5, 10*SZREG(a0) REG_L a2, 11*SZREG(a1) REG_L a3, 12*SZREG(a1) REG_L a4, 13*SZREG(a1) REG_L a5, 14*SZREG(a1) REG_L a6, 15*SZREG(a1) addi a1, a1, 16*SZREG REG_S a2, 11*SZREG(a0) REG_S a3, 12*SZREG(a0) REG_S a4, 13*SZREG(a0) REG_S a5, 14*SZREG(a0) REG_S a6, 15*SZREG(a0) addi a0, a0, 16*SZREG bltu a0, t0, 1b 2: /* Post-loop increment by 16*SZREG-1 and pre-loop decrement by SZREG-1 */ addi t0, t0, 15*SZREG /* Wordwise copy */ bgeu a0, t0, 2f 1: REG_L a5, 0(a1) addi a1, a1, SZREG REG_S a5, 0(a0) addi a0, a0, SZREG bltu a0, t0, 1b 2: addi t0, t0, SZREG-1 .Lbyte_copy_tail: /* * Bytewise copy anything left. */ beq a0, t0, 2f 1: lb a5, 0(a1) addi a1, a1, 1 sb a5, 0(a0) addi a0, a0, 1 bne a0, t0, 1b 2: mv a0, t6 .copy_end: ret #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define M_SLL sll #define M_SRL srl #else #define M_SLL srl #define M_SRL sll #endif .Lmisaligned_word_copy: /* * Misaligned word-wise copy. * For misaligned copy we still perform word-wise copy, but we need to * use the value fetched from the previous iteration and do some shifts. * This is safe because we wouldn't access more words than necessary. */ /* Calculate shifts */ slli t3, a3, 3 sub t4, x0, t3 /* negate is okay as shift will only look at LSBs */ /* Load the initial value and align a1 */ andi a1, a1, ~(SZREG-1) REG_L a5, 0(a1) addi t0, t0, -(SZREG-1) /* At least one iteration will be executed here, no check */ 1: M_SRL a4, a5, t3 REG_L a5, SZREG(a1) addi a1, a1, SZREG M_SLL a2, a5, t4 or a2, a2, a4 REG_S a2, 0(a0) addi a0, a0, SZREG bltu a0, t0, 1b /* Update pointers to correct value */ addi t0, t0, SZREG-1 add a1, a1, a3 j .Lbyte_copy_tail END(__memcpy) |