1// picoChip ASM file 2// 3// Support for 16-bit unsigned division/modulus. 4// 5// Copyright (C) 2003-2013 Free Software Foundation, Inc. 6// Contributed by Picochip Ltd. 7// Maintained by Daniel Towner (daniel.towner@picochip.com) 8// 9// This file is free software; you can redistribute it and/or modify it 10// under the terms of the GNU General Public License as published by the 11// Free Software Foundation; either version 3, or (at your option) any 12// later version. 13// 14// This file is distributed in the hope that it will be useful, but 15// WITHOUT ANY WARRANTY; without even the implied warranty of 16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17// General Public License for more details. 18// 19// Under Section 7 of GPL version 3, you are granted additional 20// permissions described in the GCC Runtime Library Exception, version 21// 3.1, as published by the Free Software Foundation. 22// 23// You should have received a copy of the GNU General Public License and 24// a copy of the GCC Runtime Library Exception along with this program; 25// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 26// <http://www.gnu.org/licenses/>. 27 28.section .text 29 30.global __divmod15 31__divmod15: 32_picoMark_FUNCTION_BEGIN= 33 34// picoChip Function Prologue : &__divmod15 = 0 bytes 35 36 // The picoChip instruction set has a divstep instruction which 37 // is used to perform one iteration of a binary division algorithm. 38 // The instruction allows 16-bit signed division to be implemented. 39 // It does not directly allow 16-bit unsigned division to be 40 // implemented. Thus, this function pulls out the common division 41 // iteration for 15-bits unsigned, and then special wrappers 42 // provide the logic to change this into a 16-bit signed or 43 // unsigned division, as appropriate. This allows the two 44 // versions of division to share a common implementation, reducing 45 // code size when the two are used together. It also reduces 46 // the maintenance overhead. 47 48 // Input: 49 // r0 - dividend 50 // r1 - divisor 51 // Output: 52 // r0 - quotient 53 // r1 - remainder 54 // R5 is unused 55 56 // Check for special cases. The emphasis is on detecting these as 57 // quickly as possible, so that the main division can be started. If 58 // the user requests division by one, division by self, and so on 59 // then they will just have to accept that this won't be particularly 60 // quick (relatively), whereas a real division (e.g., dividing a 61 // large value by a small value) will run as fast as possible 62 // (i.e., special case detection should not slow down the common case) 63 // 64 // Special cases to consider: 65 // 66 // Division by zero. 67 // Division of zero. 68 // Inputs are equal 69 // Divisor is bigger than dividend 70 // Division by power of two (can be shifted instead). 71 // Division by 1 (special case of power of two division) 72 // 73 // Division/modulus by zero is undefined (ISO C:6.5.5), so 74 // don't bother handling this special case. 75 // 76 // The special cases of division by a power of 2 are ignored, since 77 // they cause the general case to slow down. Omitting these 78 // special cases also reduces code size considerably. 79 80 // Handle divisor >= dividend separately. Note that this also handles 81 // the case where the dividend is zero. Note that the flags must be 82 // preserved, since they are also used at the branch destination. 83 sub.0 r1,r0,r15 84 sbc r0,r2 \ bge divisorGeDividend 85=-> sbc r1,r4 86 87 // Compute the shift count. The amount by which the divisor 88 // must be shifted left to be aligned with the dividend. 89 sub.0 r4,r2,r3 90 91 // Align the divisor to the dividend. Execute a divstep (since at 92 // least one will always be executed). Skip the remaining loop 93 // if the shift count is zero. 94 lsl.0 r1,r3,r1 \ beq skipLoop 95=-> divstep r0,r1 \ add.1 r3,1,r2 96 97 // Execute the divstep loop until temp is 0. This assumes that the 98 // loop count is at least one. 99 sub.0 r3,1,r4 100divLoop: 101 divstep r0,r1 \ bne divLoop 102=-> sub.0 r4,1,r4 103 104skipLoop: 105 106 // The top bits of the result are the remainder. The bottom 107 // bits are the quotient. 108 lsr.0 r0,r2,r1 \ sub.1 16,r2,r4 109 jr (lr ) \ lsl.0 r0,r4,r0 110=-> lsr.0 r0,r4,r0 111 112// Special case. 113 114divisorGeDividend: 115 // The divisor is greater than or equal to the dividend. The flags 116 // indicate which of these alternatives it is. The COPYNE can be used 117 // to set the result appropriately, without introducing any more 118 // branches. 119 copy.0 r0,r1 \ copy.1 0,r0 120 jr (lr) \ copyeq r0,r1 121=-> copyeq 1,r0 122 123_picoMark_FUNCTION_END= 124// picoChip Function Epilogue : __divmod15 125 126 127//============================================================================ 128// All DWARF information between this marker, and the END OF DWARF 129// marker should be included in the source file. Search for 130// FUNCTION_STACK_SIZE_GOES_HERE and FUNCTION NAME GOES HERE, and 131// provide the relevant information. Add markers called 132// _picoMark_FUNCTION_BEGIN and _picoMark_FUNCTION_END around the 133// function in question. 134//============================================================================ 135 136//============================================================================ 137// Frame information. 138//============================================================================ 139 140.section .debug_frame 141_picoMark_DebugFrame= 142 143// Common CIE header. 144.unalignedInitLong _picoMark_CieEnd-_picoMark_CieBegin 145_picoMark_CieBegin= 146.unalignedInitLong 0xffffffff 147.initByte 0x1 // CIE Version 148.ascii 16#0# // CIE Augmentation 149.uleb128 0x1 // CIE Code Alignment Factor 150.sleb128 2 // CIE Data Alignment Factor 151.initByte 0xc // CIE RA Column 152.initByte 0xc // DW_CFA_def_cfa 153.uleb128 0xd 154.uleb128 0x0 155.align 2 156_picoMark_CieEnd= 157 158// FDE 159_picoMark_LSFDE0I900821033007563= 160.unalignedInitLong _picoMark_FdeEnd-_picoMark_FdeBegin 161_picoMark_FdeBegin= 162.unalignedInitLong _picoMark_DebugFrame // FDE CIE offset 163.unalignedInitWord _picoMark_FUNCTION_BEGIN // FDE initial location 164.unalignedInitWord _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN 165.initByte 0xe // DW_CFA_def_cfa_offset 166.uleb128 0x0 // <-- FUNCTION_STACK_SIZE_GOES_HERE 167.initByte 0x4 // DW_CFA_advance_loc4 168.unalignedInitLong _picoMark_FUNCTION_END-_picoMark_FUNCTION_BEGIN 169.initByte 0xe // DW_CFA_def_cfa_offset 170.uleb128 0x0 171.align 2 172_picoMark_FdeEnd= 173 174//============================================================================ 175// Abbrevation information. 176//============================================================================ 177 178.section .debug_abbrev 179_picoMark_ABBREVIATIONS= 180 181.section .debug_abbrev 182 .uleb128 0x1 // (abbrev code) 183 .uleb128 0x11 // (TAG: DW_TAG_compile_unit) 184 .initByte 0x1 // DW_children_yes 185 .uleb128 0x10 // (DW_AT_stmt_list) 186 .uleb128 0x6 // (DW_FORM_data4) 187 .uleb128 0x12 // (DW_AT_high_pc) 188 .uleb128 0x1 // (DW_FORM_addr) 189 .uleb128 0x11 // (DW_AT_low_pc) 190 .uleb128 0x1 // (DW_FORM_addr) 191 .uleb128 0x25 // (DW_AT_producer) 192 .uleb128 0x8 // (DW_FORM_string) 193 .uleb128 0x13 // (DW_AT_language) 194 .uleb128 0x5 // (DW_FORM_data2) 195 .uleb128 0x3 // (DW_AT_name) 196 .uleb128 0x8 // (DW_FORM_string) 197.initByte 0x0 198.initByte 0x0 199 200 .uleb128 0x2 ;# (abbrev code) 201 .uleb128 0x2e ;# (TAG: DW_TAG_subprogram) 202.initByte 0x0 ;# DW_children_no 203 .uleb128 0x3 ;# (DW_AT_name) 204 .uleb128 0x8 ;# (DW_FORM_string) 205 .uleb128 0x11 ;# (DW_AT_low_pc) 206 .uleb128 0x1 ;# (DW_FORM_addr) 207 .uleb128 0x12 ;# (DW_AT_high_pc) 208 .uleb128 0x1 ;# (DW_FORM_addr) 209.initByte 0x0 210.initByte 0x0 211 212.initByte 0x0 213 214//============================================================================ 215// Line information. DwarfLib requires this to be present, but it can 216// be empty. 217//============================================================================ 218 219.section .debug_line 220_picoMark_LINES= 221 222//============================================================================ 223// Debug Information 224//============================================================================ 225.section .debug_info 226 227//Fixed header. 228.unalignedInitLong _picoMark_DEBUG_INFO_END-_picoMark_DEBUG_INFO_BEGIN 229_picoMark_DEBUG_INFO_BEGIN= 230.unalignedInitWord 0x2 231.unalignedInitLong _picoMark_ABBREVIATIONS 232.initByte 0x2 233 234// Compile unit information. 235.uleb128 0x1 // (DIE 0xb) DW_TAG_compile_unit) 236.unalignedInitLong _picoMark_LINES 237.unalignedInitWord _picoMark_FUNCTION_END 238.unalignedInitWord _picoMark_FUNCTION_BEGIN 239// Producer is `picoChip' 240.ascii 16#70# 16#69# 16#63# 16#6f# 16#43# 16#68# 16#69# 16#70# 16#00# 241.unalignedInitWord 0xcafe // ASM language 242.ascii 16#0# // Name. DwarfLib expects this to be present. 243 244.uleb128 0x2 ;# (DIE DW_TAG_subprogram) 245 246// FUNCTION NAME GOES HERE. Use `echo name | od -t x1' to get the hex. Each hex 247// digit is specified using the format 16#XX# 248.ascii 16#5f# 16#64# 16#69# 16#76# 16#6d# 16#6f# 16#64# 16#31# 16#35# 16#0# // Function name `_divmod15' 249.unalignedInitWord _picoMark_FUNCTION_BEGIN // DW_AT_low_pc 250.unalignedInitWord _picoMark_FUNCTION_END // DW_AT_high_pc 251 252.initByte 0x0 // end of compile unit children. 253 254_picoMark_DEBUG_INFO_END= 255 256//============================================================================ 257// END OF DWARF 258//============================================================================ 259 260.section .endFile 261// End of picoChip ASM file 262