#include "include.inc"

	.text

	.global wait1us
	.global waitus
	.global waitms

; ============================================================================
;                                  24 MHz 
; ============================================================================
; Quartz 24 MHz, 1 clock = 0.0416667 us, 1 us = 24 clocks

#if F_CPU >= 23000000

; ----------------------------------------------------------------------------
;                           Short delay 1 us (24 MHz)
; ----------------------------------------------------------------------------

; clocks = 3 + 8*2 + 1 + 4 = 24
wait1us:
	; rcall	wait1us		; [3]
	nop2			; [2]
wait1us1:
	nop2			; [2]
wait1us2:
	nop2			; [2]
	nop2			; [2]
	nop2			; [2]
wait1us4:
	nop2			; [2]
	nop2			; [2]
	nop2			; [2]
	nop			; [1]
	ret			; [4]

; ----------------------------------------------------------------------------
;                    Short delay 2..65537 us (24 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [us] (2..65535, 0=65536 us, 1=65537 us)
; Destroys: R25, R24

; base clocks (without loops) = 1+1+3+2+14+(-1)+4=24

	; ldi	r24,lo8(delay)	; [1]
	; ldi	r25,hi8(delay)	; [1]
	; rcall	waitus		; [3]

waitus:	sbiw	r24,1		; [2] compensate base clocks
	rcall	wait1us4	; [3+3*2+1+4=14]

; ----- loop [24] clocks = 1 us

waitus2:rcall	wait1us2	; [3+6*2+1+4=20]
	sbiw	r24,1		; [2] decrement R25:R24
	brne	waitus2		; [1,2] loop

	ret			; [4]

; ----------------------------------------------------------------------------
;                    Long delay 1..65536 ms (24 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [ms] (1..65535, 0=65536 ms)
; Destroys: R27, R26, R25, R24

; ----- delay R25:R24 -> R27:R26

waitms:	movw	r26,r24		; R27:R26 <- required delay

; ----- loop of 1 ms (24000 clocks)

waitms4:rcall	wait1us1	; [3+7*2+1+4=22]

	ldi	r24,lo8(999)	; [1] delay low
	ldi	r25,hi8(999)	; [1] delay high
	rcall	waitus		; [3+8*2+(delay-1)*24-1+4 = 23974]

	sbiw	r26,1		; [2] decrement R27:R26
	brne	waitms4		; [1,2] loop

	ret

; ============================================================================
;                                  22.1184 MHz 
; ============================================================================
; Quartz 22.1184 MHz, 1 clock = 0.0452112 us, 1 us = 22.1184 clocks

#elif F_CPU >= 21000000

; ----------------------------------------------------------------------------
;                           Short delay 1 us (22.1184 MHz)
; ----------------------------------------------------------------------------

; clocks = 3 + 2*2 + 5*2 + 1 + 4 = 22
wait1us:
	; rcall	wait1us		; [3]
	nop2			; [2]
	nop2			; [2]
wait1us2:
	nop2			; [2]
	nop2			; [2]
	nop2			; [2]
	nop2			; [2]
	nop2			; [2]
	nop			; [1]
	ret			; [4]

; ----------------------------------------------------------------------------
;                    Short delay 2..65537 us (22.1184 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [us] (2..65535, 0=65536 us, 1=65537 us)
; Destroys: R25, R24

; base clocks (without loops) = 1+1+3+7*2+(-1)+4=22

	; ldi	r24,lo8(delay)	; [1]
	; ldi	r25,hi8(delay)	; [1]
	; rcall	waitus		; [3]

waitus:	sbiw	r24,1		; [2] compensate base clocks
	nop2			; [2] short delay
	nop2			; [2] short delay
	nop2			; [2] short delay
	nop2			; [2] short delay
	nop2			; [2] short delay
	nop2			; [2] short delay

; ----- loop [22] clocks = 1 us

waitus2:rcall	wait1us2	; [3+5*2+1+4=18]
	sbiw	r24,1		; [2] decrement R25:R24
	brne	waitus2		; [1,2] loop

	ret			; [4]

; ----------------------------------------------------------------------------
;                    Long delay 1..65536 ms (22.1184 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [ms] (1..65535, 0=65536 ms)
; Destroys: R27, R26, R25, R24

; ----- delay R25:R24 -> R27:R26

waitms:	movw	r26,r24		; R27:R26 <- required delay

; ----- loop of 1 ms (22118 clocks)

waitms4:nop2			; [2] short delay
	nop2			; [2] short delay

	ldi	r24,lo8(1005)	; [1] delay low
	ldi	r25,hi8(1005)	; [1] delay high
	rcall	waitus		; [3+7*2+(delay-1)*22-1+4 = 22108]

	sbiw	r26,1		; [2] decrement R27:R26
	brne	waitms4		; [1,2] loop

	ret

; ============================================================================
;                                  20 MHz 
; ============================================================================
; Quartz 20 MHz, 1 clock = 0.05 us, 1 us = 20 clocks

#elif F_CPU >= 18000000

; ----------------------------------------------------------------------------
;                           Short delay 1 us (20 MHz)
; ----------------------------------------------------------------------------

; clocks = 3 + 2*2 + 4*2 + 1 + 4 = 20
wait1us:
	; rcall	wait1us		; [3]
	nop2			; [2]
	nop2			; [2]
wait1us2:
	nop2			; [2]
	nop2			; [2]
	nop2			; [2]
	nop2			; [2]
	nop			; [1]
	ret			; [4]

; ----------------------------------------------------------------------------
;                    Short delay 2..65537 us (20 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [us] (2..65535, 0=65536 us, 1=65537 us)
; Destroys: R25, R24

; base clocks (without loops) = 1+1+3+6*2+(-1)+4=20

	; ldi	r24,lo8(delay)	; [1]
	; ldi	r25,hi8(delay)	; [1]
	; rcall	waitus		; [3]

waitus:	sbiw	r24,1		; [2] compensate base clocks
	nop2			; [2] short delay
	nop2			; [2] short delay
	nop2			; [2] short delay
	nop2			; [2] short delay
	nop2			; [2] short delay

; ----- loop [20] clocks = 1 us

waitus2:rcall	wait1us2	; [3+4*2+1+4=16]
	sbiw	r24,1		; [2] decrement R25:R24
	brne	waitus2		; [1,2] loop

	ret			; [4]

; ----------------------------------------------------------------------------
;                    Long delay 1..65536 ms (20 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [ms] (1..65535, 0=65536 ms)
; Destroys: R27, R26, R25, R24

; ----- delay R25:R24 -> R27:R26

waitms:	movw	r26,r24		; R27:R26 <- required delay

; ----- loop of 1 ms (20000 clocks)

waitms4:rcall	wait1us2	; [3+4*2+1+4=16]

	ldi	r24,lo8(999)	; [1] delay low
	ldi	r25,hi8(999)	; [1] delay high
	rcall	waitus		; [3+6*2+(delay-1)*20-1+4 = 19978]

	sbiw	r26,1		; [2] decrement R27:R26
	brne	waitms4		; [1,2] loop

	ret

; ============================================================================
;                                  16 MHz 
; ============================================================================
; Quartz 16 MHz, 1 clock = 0.0625 us, 1 us = 16 clocks

#elif F_CPU >= 14000000

; ----------------------------------------------------------------------------
;                           Short delay 1 us (16 MHz)
; ----------------------------------------------------------------------------

; clocks = 3 + 2*2 + 2*2 + 1 + 4 = 16
wait1us:
	; rcall	wait1us		; [3]
	nop2			; [2]
	nop2			; [2]
wait1us2:
	nop2			; [2]
	nop2			; [2]
	nop			; [1]
	ret			; [4]

; ----------------------------------------------------------------------------
;                    Short delay 2..65537 us (16 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [us] (2..65535, 0=65536 us, 1=65537 us)
; Destroys: R25, R24

; base clocks (without loops) = 1+1+3+4*2+(-1)+4=16

	; ldi	r24,lo8(delay)	; [1]
	; ldi	r25,hi8(delay)	; [1]
	; rcall	waitus		; [3]

waitus:	sbiw	r24,1		; [2] compensate base clocks
	nop2			; [2] short delay
	nop2			; [2] short delay
	nop2			; [2] short delay

; ----- loop [16] clocks = 1 us

waitus2:rcall	wait1us2	; [3+2+2+1+4=12]
	sbiw	r24,1		; [2] decrement R25:R24
	brne	waitus2		; [1,2] loop

	ret			; [4]

; ----------------------------------------------------------------------------
;                    Long delay 1..65536 ms (16 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [ms] (1..65535, 0=65536 ms)
; Destroys: R27, R26, R25, R24

; ----- delay R25:R24 -> R27:R26

waitms:	movw	r26,r24		; R27:R26 <- required delay

; ----- loop of 1 ms (16000 clocks)

waitms4:rcall	wait1us2	; [3+2+2+1+4=12]

	ldi	r24,lo8(999)	; [1] delay low
	ldi	r25,hi8(999)	; [1] delay high
	rcall	waitus		; [3+4*2+(delay-1)*16-1+4 = 15982]

	sbiw	r26,1		; [2] decrement R27:R26
	brne	waitms4		; [1,2] loop

	ret

; ============================================================================
;                                  12 MHz 
; ============================================================================
; Quartz 12 MHz, 1 clock = 0.0833 us, 1 us = 12 clocks

#elif F_CPU >= 10000000

; ----------------------------------------------------------------------------
;                           Short delay 1 us (12 MHz)
; ----------------------------------------------------------------------------

; clocks = 3 + 2*2 + 1 + 4 = 12
wait1us:
	; rcall	wait1us		; [3]
	nop2			; [2]
	nop2			; [2]
wait1us2:
	nop			; [1]
	ret			; [4]

; ----------------------------------------------------------------------------
;                    Short delay 2..65537 us (12 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [us] (2..65535, 0=65536 us, 1=65537 us)
; Destroys: R25, R24

; base clocks (without loops) = 1+1+3+2+2+(-1)+4=12

	; ldi	r24,lo8(delay)	; [1]
	; ldi	r25,hi8(delay)	; [1]
	; rcall	waitus		; [3]

waitus:	sbiw	r24,1		; [2] compensate base clocks
	nop2			; [2] short delay

; ----- loop [12] clocks = 1 us

waitus2:rcall	wait1us2	; [3+1+4=8]
	sbiw	r24,1		; [2] decrement R25:R24
	brne	waitus2		; [1,2] loop

	ret			; [4]

; ----------------------------------------------------------------------------
;                    Long delay 1..65536 ms (12 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [ms] (1..65535, 0=65536 ms)
; Destroys: R27, R26, R25, R24

; ----- delay R25:R24 -> R27:R26

waitms:	movw	r26,r24		; R27:R26 <- required delay

; ----- loop of 1 ms (12000 clocks)

waitms4:rcall	wait1us2	; [3+1+4=8]

	ldi	r24,lo8(999)	; [1] delay low
	ldi	r25,hi8(999)	; [1] delay high
	rcall	waitus		; [3+2+2+(delay-1)*12-1+4 = 11986]

	sbiw	r26,1		; [2] decrement R27:R26
	brne	waitms4		; [1,2] loop

	ret

; ============================================================================
;                                  8 MHz 
; ============================================================================
; Quartz 8 MHz, 1 clock = 0.125 us, 1 us = 8 clocks

#elif F_CPU >= 7600000

; ----------------------------------------------------------------------------
;                           Short delay 1 us (8 MHz)
; ----------------------------------------------------------------------------

; clocks = 3 + 1 + 4 = 8
wait1us:
	; rcall	wait1us		; [3]
	nop			; [1]
	ret			; [4]

; ----------------------------------------------------------------------------
;                    Short delay 2..65537 us (8 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [us] (2..65535, 0=65536 us, 1=65537 us)
; Destroys: R25, R24

; base clocks (without loops) = 1+1+3+2+(-1)+4=10

	; ldi	r24,lo8(delay)	; [1]
	; ldi	r25,hi8(delay)	; [1]
	; rcall	waitus		; [3]

waitus:	sbiw	r24,1		; [2] compensate base clocks

; ----- loop [8] clocks = 1 us

waitus2:nop2			; [2] short delay
	nop2			; [2] short delay
	sbiw	r24,1		; [2] decrement R25:R24
	brne	waitus2		; [1,2] loop

	ret			; [4]

; ----------------------------------------------------------------------------
;                    Long delay 1..65536 ms (8 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [ms] (1..65535, 0=65536 ms)
; Destroys: R27, R26, R25, R24

; ----- delay R25:R24 -> R27:R26

waitms:	movw	r26,r24		; R27:R26 <- required delay

; ----- loop of 1 ms (8000 clocks)

waitms4:nop2			; [2] short delay

	ldi	r24,lo8(999)	; [1] delay low
	ldi	r25,hi8(999)	; [1] delay high
	rcall	waitus		; [3+2+(delay-1)*8-1+4 = 7992]

	sbiw	r26,1		; [2] decrement R27:R26
	brne	waitms4		; [1,2] loop

	ret

; ============================================================================
;                                  7.3728 MHz 
; ============================================================================
; Quartz 7.3728 MHz, 1 clock = 0.136 us, 1 us = 7.3728 clocks, 7 clocks = 0.95us

#elif F_CPU >= 6000000

; ----------------------------------------------------------------------------
;                           Short delay 1 us (7.3728 MHz)
; ----------------------------------------------------------------------------

; clocks = 3 + 4 = 7
wait1us:
	; rcall	wait1us		; [3]
	ret			; [4]

; ----------------------------------------------------------------------------
;                    Short delay 5..65537 us (7.3728 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [us] (5..65535)
; Destroys: R25, R24

; pre-correction delay/21, takes min. 40 clocks = 5 us
waitus:	push	r24	; [2] push delay LOW
	push	r25	; [2] push delay HIGH

	lsr	r25	; [1] delay/2 HIGH
	ror	r24	; [1] delay/2 LOW
	lsr	r25	; [1] delay/4 HIGH
	ror	r24	; [1] delay/4 LOW
	lsr	r25	; [1] delay/8 HIGH
	ror	r24	; [1] delay/8 LOW
	lsr	r25	; [1] delay/16 HIGH
	ror	r24	; [1] delay/16 LOW
	lsr	r25	; [1] delay/32 HIGH
	ror	r24	; [1] delay/32 LOW

	push	r24	; [2] push delay LOW
	push	r25	; [2] push delay HIGH

	sbiw	r24,0	; [2] result = 0 ?
	breq	waitus2	; [1,2] result = 0
	rcall	waitus6	; [3] delay/32

waitus2:pop	r25	; [2] pop delay HIGH
	pop	r24	; [2] pop delay LOW

	lsr	r25	; [1] delay/64 HIGH
	ror	r24	; [1] delay/64 LOW

	sbiw	r24,0	; [2] result = 0 ?
	breq	waitus4	; [1,2] result = 0
	rcall	waitus6	; [3] delay/64

waitus4:pop	r25	; [2] pop delay HIGH
	pop	r24	; [2] pop delay LOW

; ----- minimal time 5 us

	sbiw	r24,5	; [2] compensate base clocks
	brcs	waitus8	; [1,2] overflow
	breq	waitus8	; [1,2] zero

; ----- loop [7] clocks = 1 us

waitus6:nop2			; [2] short delay
	nop			; [1] short delay
	sbiw	r24,1		; [2] decrement R25:R24
	brne	waitus6		; [1,2] loop

waitus8:ret			; [4]

; ----------------------------------------------------------------------------
;                    Long delay 1..65536 ms (7.3728 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [ms] (1..65535, 0=65536 ms)
; Destroys: R27, R26, R25, R24

; ----- delay R25:R24 -> R27:R26

waitms:	movw	r26,r24		; R27:R26 <- required delay

; ----- loop of 1 ms (7373 clocks)

waitms4:nop2			; [2] short delay
	nop2			; [2] short delay

	ldi	r24,lo8(1051)	; [1] delay low
	ldi	r25,hi8(1051)	; [1] delay high
	rcall	waitus6		; [3+delay*7-1+4 = 7363]

	sbiw	r26,1		; [2] decrement R27:R26
	brne	waitms4		; [1,2] loop

	ret

; ============================================================================
;                                  4 MHz 
; ============================================================================
; Quartz 4 MHz, 1 clock = 0.25 us, 1 us = 4 clocks

#elif F_CPU >= 3000000

; ----------------------------------------------------------------------------
;                           Short delay 1 us (4 MHz)
; ----------------------------------------------------------------------------

; clocks = 3 + 4 = 7
wait1us:
	; rcall	wait1us		; [3]
	ret			; [4]

; ----------------------------------------------------------------------------
;                    Short delay 2..65537 us (4 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [us] (2..65535, 0=65536 us, 1=65537 us)
; Destroys: R25, R24

; base clocks (without loops) = 1+1+3+2+1+(-1)+4=11

	; ldi	r24,lo8(delay)	; [1]
	; ldi	r25,hi8(delay)	; [1]
	; rcall	waitus		; [3]

waitus:	sbiw	r24,2		; [2] compensate base clocks
	breq	waitus4		; [1,2] 2 us

; ----- loop [4] clocks = 1 us

waitus2:sbiw	r24,1		; [2] decrement R25:R24
	brne	waitus2		; [1,2] loop

waitus4:ret			; [4]

; ----------------------------------------------------------------------------
;                    Long delay 1..65536 ms (4 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [ms] (1..65535, 0=65536 ms)
; Destroys: R27, R26, R25, R24

; ----- delay R25:R24 -> R27:R26

waitms:	movw	r26,r24		; R27:R26 <- required delay

; ----- loop of 1 ms (4000 clocks)

waitms4:nop			; [1] short delay

	ldi	r24,lo8(998)	; [1] delay low
	ldi	r25,hi8(998)	; [1] delay high
	rcall	waitus		; [3+2+1+(delay-2)*4-1+4 = 3993]

	sbiw	r26,1		; [2] decrement R27:R26
	brne	waitms4		; [1,2] loop

	ret

; ============================================================================
;                                  2 MHz 
; ============================================================================
; Quartz 2 MHz, 1 clock = 0.5 us, 1 us = 2 clocks

#elif F_CPU >= 1500000

; ----------------------------------------------------------------------------
;                           Short delay 1 us (2 MHz)
; ----------------------------------------------------------------------------

; clocks = 3 + 4 = 7
wait1us:
	; rcall	wait1us		; [3]
	ret			; [4]

; ----------------------------------------------------------------------------
;                    Short delay 2..65537 us (2 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [us] (2..65535, 0=65536 us, 1=65537 us)
; Destroys: R25, R24

; base clocks (without loops) = 1+1+3+1+1+(-1)+4=10

	; ldi	r24,lo8(delay)	; [1]
	; ldi	r25,hi8(delay)	; [1]
	; rcall	waitus		; [3]

waitus:	lsr	r25		; [1] rotate right
	ror	r24		; [1] rotate right (=delay/2)

; ----- loop [4] clocks = 2 us

waitus2:sbiw	r24,1		; [2] decrement R25:R24
	brne	waitus2		; [1,2] loop

	ret			; [4]

; ----------------------------------------------------------------------------
;                    Long delay 1..65536 ms (2 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [ms] (1..65535, 0=65536 ms)
; Destroys: R27, R26, R25, R24

; ----- delay R25:R24 -> R27:R26

waitms:	movw	r26,r24		; R27:R26 <- required delay

; ----- loop of 1 ms (2000 clocks)

waitms4:nop2			; [2] short delay

	ldi	r24,lo8(992)	; [1] delay low
	ldi	r25,hi8(992)	; [1] delay high
	rcall	waitus		; [3+1+1+(delay/2)*4-1+4 = 1992]

	sbiw	r26,1		; [2] decrement R27:R26
	brne	waitms4		; [1,2] loop

	ret

; ============================================================================
;                                  1 MHz 
; ============================================================================
; Quartz 1 MHz, 1 clock = 1 us, 1 us = 1 clock

#elif F_CPU >= 300000

; ----------------------------------------------------------------------------
;                           Short delay 1 us (1 MHz)
; ----------------------------------------------------------------------------

; clocks = 3 + 4 = 7
wait1us:
	; rcall	wait1us		; [3]
	ret			; [4]

; ----------------------------------------------------------------------------
;                    Short delay 2..65535 us (1 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [us] (2..65535)
; Destroys: R25, R24

; base clocks (without loops) = 1+1+3+5*1+(-1)+4=13

	; ldi	r24,lo8(delay)	; [1]
	; ldi	r25,hi8(delay)	; [1]
	; rcall	waitus		; [3]

waitus:	lsr	r25		; [1] rotate right
	ror	r24		; [1] rotate right (=delay/2)
	lsr	r25		; [1] rotate right
	ror	r24		; [1] rotate right (=delay/4)
	breq	waitus4		; [1,2] zero

; ----- loop [4] clocks = 4 us

waitus2:sbiw	r24,1		; [2] decrement R25:R24
	brne	waitus2		; [1,2] loop

waitus4:ret			; [4]

; ----------------------------------------------------------------------------
;                    Long delay 1..65536 ms (1 MHz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [ms] (1..65535, 0=65536 ms)
; Destroys: R27, R26, R25, R24

; ----- delay R25:R24 -> R27:R26

waitms:	movw	r26,r24		; R27:R26 <- required delay

; ----- loop of 1 ms (1000 clocks)

waitms4:nop			; [1] short delay
	nop2			; [2] short delay

	ldi	r24,lo8(980)	; [1] delay low
	ldi	r25,hi8(980)	; [1] delay high
	rcall	waitus		; [3+5*1+(delay/4)*4-1+4 = 991]

	sbiw	r26,1		; [2] decrement R27:R26
	brne	waitms4		; [1,2] loop

	ret

; ============================================================================
;                                  32768 Hz
; ============================================================================
; Quartz 32768 Hz, 1 clock = 30.5 us

#else

; ----------------------------------------------------------------------------
;                           Short delay 1 us (32768 Hz)
; ----------------------------------------------------------------------------

; clocks = 3 + 4 = 7
wait1us:
	; rcall	wait1us		; [3]
	ret			; [4]

; ----------------------------------------------------------------------------
;                    Short delay 2..65535 us (32768 Hz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [us] (2..65535)
; Destroys: R25, R24

; base clocks (without loops) = 1+1+3+15*1+(-1)+4=23

	; ldi	r24,lo8(delay)	; [1]
	; ldi	r25,hi8(delay)	; [1]
	; rcall	waitus		; [3]

waitus:	lsr	r25		; [1] rotate right
	ror	r24		; [1] rotate right (=delay/2)
	lsr	r25		; [1] rotate right
	ror	r24		; [1] rotate right (=delay/4)
	lsr	r25		; [1] rotate right
	ror	r24		; [1] rotate right (=delay/8)
	lsr	r25		; [1] rotate right
	ror	r24		; [1] rotate right (=delay/16)
	lsr	r25		; [1] rotate right
	ror	r24		; [1] rotate right (=delay/32)
	lsr	r25		; [1] rotate right
	ror	r24		; [1] rotate right (=delay/64)
	lsr	r25		; [1] rotate right
	ror	r24		; [1] rotate right (=delay/128)
	breq	waitus4		; [1,2] zero

; ----- loop [4] clocks = 122 us

waitus2:sbiw	r24,1		; [2] decrement R25:R24
	brne	waitus2		; [1,2] loop

waitus4:ret			; [4]

; ----------------------------------------------------------------------------
;                    Long delay 1..65536 ms (32768 Hz)
; ----------------------------------------------------------------------------
; INPUT: R25:R24=delay in [ms] (1..65535, 0=65536 ms)
; Destroys: R27, R26, R25, R24

; ----- delay R25:R24 -> R27:R26

waitms:	movw	r26,r24		; R27:R26 <- required delay

; ----- loop of 1 ms (33 clocks)

waitms4:nop			; [1] short delay

	ldi	r24,lo8(5)	; [1] delay low
	ldi	r25,hi8(5)	; [1] delay high
	rcall	waitus2		; [3+delay*4-1+4 = 26]

	sbiw	r26,1		; [2] decrement R27:R26
	brne	waitms4		; [1,2] loop

	ret

#endif
