
// ****************************************************************************
//
//                            Assembler code
//
// ****************************************************************************
// RISC-V assembler:
// Function may destroy (storer=caller): x1 (ra), x5-x7 (t0-t2), x10-x15 (a0-a5)
// Function must save (storer=callee): x8 (s0/fp), x9 (s1)
// Special registers: x0 (zero), x2 (sp), x3 (gp), x4 (tp)

// TP (x4, Thread Pointer) register is not used in this system, can be used as special register in functions.

#define	PERIPH_BASE		0x40000000			// Peripheral base address in the alias region

#define APB1PERIPH_BASE		PERIPH_BASE			// 0x40000000
#define APB2PERIPH_BASE		(PERIPH_BASE + 0x10000)		// 0x40010000
#define AHBPERIPH_BASE		(PERIPH_BASE + 0x20000)		// 0x40020000
// Timer
#define TIM1_BASE		(APB2PERIPH_BASE + 0x2C00)	// TIM1 0x40012C00
#define TIM2_BASE		(APB1PERIPH_BASE + 0x0000)	// TIM2 0x40000000
#define TIM_DMAINTENR_OFF	0x0C				// TIM: DMA/interrupt enable register
#define TIM_INTFR_OFF		0x10				// TIM: offset of interrupt status register
#define TIM_CCER_OFF		0x20				// TIM: offset of compare/capture enable register
#define TIM_CH1CVR_OFF		0x34				// TIM: offset of compare/capture register 1
#define TIM_CH2CVR_OFF		0x38				// TIM: offset of compare/capture register 2
#define TIM_CH3CVR_OFF		0x3C				// TIM: offset of compare/capture register 3
#define TIM_CH4CVR_OFF		0x40				// TIM: offset of compare/capture register 4
// SysTick
#define SYSTICK_BASE		0xE000F000			// SysTick System counter
#define SYSTICK_CNT_OFF		0x08				// SysTick: offset of system counter register
// PD2 port
#define GPIOD_BASE		(APB2PERIPH_BASE + 0x1400)	// PD port 0x40011400
#define GPIO_INDR_OFF		0x08				// offset of input data register
// USART2
#define USART2_BASE		(APB1PERIPH_BASE + 0x4400)	// USART2 0x40004400 (only CH32V005-CH32V007)
#define USART_STATR_OFF		0x00				// USART: status register offset
#define USART_DATAR_OFF		0x04				// USART: data register offset

// "sBiquad" biquad filter structure
#define BIQUAD_B0		0x00				// b0
#define BIQUAD_A1		0x04				// a1
#define BIQUAD_A2		0x08				// a2
#define BIQUAD_X1		0x0C				// x1
#define BIQUAD_X2		0x10				// x2
#define BIQUAD_Y1		0x14				// y1
#define BIQUAD_Y2		0x18				// y2
#define BIQUAD_MAX		0x1C				// max
#define BIQUAD_LEVEL		0x20				// level
#define BIQUAD_PEAKLIVE		0x24				// peak live counter
#define BIQUAD_PEAK		0x26				// peak
#define BIQUAD_DISP		0x27				// display level
#define BIQUAD_SIZE		0x28				// size of sBiquad structure
#define N_BANDS			16				// number of bands

// "sCOMScreen" COM screen buffer structure
#define COM_SCREENH		8		// number of rows of the screen buffer (must be power of 2)
#define COM_SCREENW		16		// number of positions per screen row
#define COM_SCREEN_ROW		0x00		// (s32) current row
#define COM_SCREEN_LEN		0x04		// (u8[8]) length of row (0xff = not initialized yet)
#define COM_SCREEN_BUF		0x0C		// (char[8*16]) screen data
#define COMScreen		Buf		// address of interrupt screen buffer

// NA page
#define NA_STEPBITS	10			// number of bits per X integer step
#define NA_PHASEMASK	((1<<NA_STEPBITS)-1)	// phase fraction mask
#define NA_BUFBITS	12			// buffer size in bits
#define NA_BUFNUM	(1<<NA_BUFBITS) 	// buffer number of entries (= 4096)
#define NA_BUFMASK	(NA_BUFNUM-1)		// buffer index mask

// REP page
#define REP_BUFSIZE	2048			// max. number of samples

// FT setup
#define COUNT_OUT	32768	// number of pulses per measure window, when Timer 1 interrupt
			// can overload (max. 500kHz, but better 65kHz - SysTick can distort measurements)

// ============================================================================
//                     DUT - interrupt on signal edge
// ============================================================================
// HANDLER void NOFLASH(TIM1_CC_IRQHandler)()
// HANDLER void NOFLASH(DUT_Handler)()

	.section .time_critical, "ax"

	.align	4,,
.global DUT_Handler
DUT_Handler:

	// prepare TIM1 base -> A5
	li	a5,TIM1_BASE		// A5 <- TIM1 base

	// read capture result -> A4
	lw	a4,TIM_CH1CVR_OFF(a5)	// A4 <- capture result

	// clear interrupt result
	sw	zero,TIM_INTFR_OFF(a5)	// clear interrupt status register

	// get current time -> A3
	li	a2,SYSTICK_BASE		// A2 <- SysTick base
	lw	a3,SYSTICK_CNT_OFF(a2)	// A3 <- SysTick counter
	li	a2,1<<1			// A2 <- bit 1 (flag of falling edge)

// Here is:
//  A5 ... TIM1 base
//  A4 ... capture result
//  A3 ... current time
//  A2 ... bit 1 (flag of falling edge)

	// get edge time -> A4 (time += (s16)(capture - time))
	sub	a4,a4,a3		// A4 <- capture - time
	slli	a4,a4,16		// A4 <- shift low 16-bits high
	srai	a4,a4,16		// A4 <- extend low 16-bits to signed
	add	a4,a4,a3		// A4 <- new precise time of the edge

	// get length of new interval -> A4, save new edge time -> DutLast
	lui	t0,%hi(DutLast)		// T0 <- address of DutLast, time of last edge
	lw	a1,%lo(DutLast)(t0)	// A1 <- old edge time
	sw	a4,%lo(DutLast)(t0)	// DutLast <- save new edge time
	sub	a4,a4,a1		// A4 <- length of new interval

	// flip edge
	lw	a3,TIM_CCER_OFF(a5)	// A3 <- get capture enable register
	lui	t1,%hi(DutH_Tmp)	// T1 <- address of DutH_Tmp, last high pulse
	xor	a1,a3,a2		// A1 <- change edge flag
	sw	a1,TIM_CCER_OFF(a5)	// set new flag

// Here is:
//  A4 ... length of new interval
//  A3 ... old capture enable register CCER
//  A2 ... bit 13 (flag of falling edge)
//  T1 ... address of DutH_Tmp, last high pulse

// falling edge - end of HIGH pulse (1st interrupt after init)
	and	a0,a3,a2		// check falling edge
	beqz	a0,2f			// skip if rising edge (flag is 0)

	// save new HIGH interval
	sw	a4,%lo(DutH_Tmp)(t1)	// DutH_Tmp <- save new HIGH interval
	mret

// rising edge - end of LOW pulse (2nd interrupt after init)

// Here is:
//  A4 ... length of new interval
//  T1 ... address of DutH_Tmp, last high pulse

	// get pulse counter -> A3
2:	lui	t2,%hi(DutN)		// T2 <- address of DutN, number of samples
	lw	a3,%lo(DutN)(t2)	// A3 <- pulse counter DutN

	// check if pulse is valid
	bltz	a3,4f			// skip if pulse is not valid

// Here is:
//  A4 ... length of new interval
//  A3 ... pulse counter DutN
//  T2 ... address of DutN, number of samples
//  T1 ... address of DutH_Tmp, last high pulse

	// add last LOW pulse to the accumulator	
	lui	a1,%hi(DutL)		// A1 <- address of DutL, LOW pulse accumulator
	lw	a2,%lo(DutL)(a1)	// A2 <- LOW pulse accumulator
	lui	a0,%hi(DutH)		// A0 <- address of DutH, HIGH pulse accumulator
	add	a2,a2,a4		// A2 <- add last LOW pulse to the accumulator

	// add last HIGH pulse to the accumulatpr
	lw	a4,%lo(DutH)(a0)	// A4 <- HIGH pulse accumulator
	lw	t1,%lo(DutH_Tmp)(t1)	// T1 <- old HIGH interval from DutH_Tmp
	sw	a2,%lo(DutL)(a1)	// save new LOW accumulator
	add	a4,a4,t1		// A4 <- add last HIGH pulse to the accumulator
	sw	a4,%lo(DutH)(a0)	// save new HIGH accumulator

	// increase pulse countr
4:	addi	a3,a3,1			// increase counter
	sw	a3,%lo(DutN)(t2)	// save new counter
	mret

// ============================================================================
//                          FFT - Add samples to filters
// ============================================================================
// void NOFLASH(FFT_AddSamples)();
// takes 30 ms

	.section .time_critical, "ax"

	.align	4,,
.global FFT_AddSamples
FFT_AddSamples:

	// save registers
	add	sp,sp,-32
	sw	ra,0(sp)
	sw	s0,4(sp)
	sw	s1,8(sp)
	sw	tp,12(sp)

// Locals:
//  0(sp): RA
//  4(sp): S0
//  8(sp): S1
//  12(sp): TP
//  16(sp): buffer address 's0'
//  20(sp): end of 1st part of buffer 's0 + FFT_BUF_SIZE/2'
//  24(sp): end of 2nd part of buffer 's0 + FFT_BUF_SIZE'
//  28(sp): end of filters
//  A0: temporary low
//  A1: temporary high
//  A2: carry, temporary
//  A3: acc low, Y
//  A4: acc high
//  A5: X
//  RA: max
//  T0: pointer to samples 's'
//  T1: Y1
//  T2: Y2
//  S0: X1
//  S1: X2
//  TP: filter 'f' (offsets BIQUAD_B0, A1, A2, X1, X2, Y1, Y2, MAX)

	// prepare buffer address 's0'
	lui	a5,%hi(FFT_BufInx)
	lbu	a5,%lo(FFT_BufInx)(a5)		// A5 <- buffer index
	la	a4,Buf				// A4 <- buffer base
	slli	a5,a5,11			// A5 <- buffer index * FFT_BUF_SIZE
	add	a5,a5,a4			// A5 <- buffer address
	sw	a5,16(sp)			// save buffer address 's0'

	// prepare end of buffer
	addi	a5,a5,1024			// A5 <- end of 1st part of buffer
	sw	a5,20(sp)			// save end of 1st part of buffer
	addi	a5,a5,1024			// A5 <- end of 2nd part of buffer
	sw	a5,24(sp)			// save end of 2nd part of buffer

	// pointer to filter -> TP
	la	tp,FFT_Bands			// TP <- pointer to filters
	addi	a5,tp,BIQUAD_SIZE*N_BANDS	// A5 <- end of filters
	sw	a5,28(sp)			// save end of filters

	.align	2,,

	// prepare pointer to samples -> T0
2:	lw	t0,16(sp)			// T0 <- pointer to samples 's'

	// load registers of the filter
	lw	t1,BIQUAD_Y1(tp)		// T1 <- Y1
	lw	t2,BIQUAD_Y2(tp)		// T2 <- Y2
	lw	s0,BIQUAD_X1(tp)		// S0 <- X1
	lw	s1,BIQUAD_X2(tp)		// S1 <- X2

// ==== 1st loop, without 'max' update

	.align	2,,

	// load next sample and convert it to Q26 format -> x, A5
3:	lbu	a5,0(t0)			// A5 <- sample 0..255
	addi	t0,t0,1				// T0 <- shift to next sample
	addi	a5,a5,-128			// A5 <- sample - 128
	slli	a5,a5,26-7			// A5 <- x, shift to format Q26

	// get accumulator acc = b0*(x[n] - x[n-2]) -> A4:A3
	lw	a4,BIQUAD_B0(tp)		// A4 <- b0
	sub	a2,a5,s1			// A2 <- x - x2
	mul	a3,a4,a2			// A3 <- b0*(x - x2) low
	lw	a1,BIQUAD_A1(tp)		// A1 <- a1
	mulh	a4,a4,a2			// A4 <- b0*(x - x2) high

	// get a1*y[n-1] -> A1:A0
	mul	a0,a1,t1			// A0 <- a1*y1 low
	mulh	a1,a1,t1			// A1 <- a1*y1 high

	// subtract A1:A0 from accumulator A4:A3 acc -= a1*y[n-1] -> acc = A1:A0
	sub	a0,a3,a0			// A0 <- subtract acc - a1*y1 LOW
	sltu	a2,a3,a0			// A2 <- carry, set to 1 if old_acc < new_acc
	sub	a1,a4,a1			// A1 <- subtract acc - a1*y1 HIGH
	lw	a4,BIQUAD_A2(tp)		// A4 <- a2
	sub	a1,a1,a2			// A1 <- subtract carry, result acc = A1:A0

	// get a2*y[n-2] -> A4:A3
	mul	a3,a4,t2			// A3 <- a2*y2 low
	mulh	a4,a4,t2			// A4 <- a2*y2 high

	// subtract A4:A3 from accumulator A1:A0 acc -= a2*y[n-2] -> acc = A4:A3
	sub	a3,a0,a3			// A3 <- subtract acc - a2*y2 LOW
	sltu	a2,a0,a3			// A2 <- carry, set to 1 if old_acc < new_acc
	sub	a4,a1,a4			// A4 <- subtract acc - a2*y2 HIGH
	sub	a4,a4,a2			// A4 <- subtract carry, result acc = A4:A3

	// add rounding correction to accumulator acc A4:A3
	li	a2,1 << 25			// rounding correction
	add	a3,a2,a3			// A3 <- add rounding correction LOW
	sltu	a2,a3,a2			// A2 <- carry, set to if new_acc < correction
	add	a4,a4,a2			// A4 <- add carry, recult acc = A4:A3

	// shift state
	mv	s1,s0				// S1 x2 <- S0 x1
	mv	s0,a5				// S0 x1 <- A5 x
	mv	t2,t1				// T2 y2 <- T1 y1

	// convert result to Q26 -> y = T1 y1
	srli	a3,a3,26			// A3 <- shift acc >> 26 LOW
	slli	a4,a4,(32-26)			// A4 <- get carry from HIGH
	or	t1,a3,a4			// T1 y1 <- y, add carry from HIGH

	// shift to next sample
	lw	a2,20(sp)			// A2 <- pointer to end of 1st part of the buffer
	bne	t0,a2,3b			// loop next sample

	// load 'max' register
	lw	ra,BIQUAD_MAX(tp)		// RA <- max

// ==== 2nd loop, with 'max' update

	.align	2,,

	// load next sample and convert it to Q26 format -> A5
6:	lbu	a5,0(t0)			// A5 <- sample 0..255
	addi	t0,t0,1				// T0 <- shift to next sample
	addi	a5,a5,-128			// A5 <- sample - 128
	slli	a5,a5,26-7			// shift to format Q26

	// get accumulator acc = b0*(x[n] - x[n-2]) -> A4:A3
	lw	a4,BIQUAD_B0(tp)		// A4 <- b0
	sub	a2,a5,s1			// A2 <- x - x2
	mul	a3,a4,a2			// A3 <- b0*(x - x2) low
	lw	a1,BIQUAD_A1(tp)		// A1 <- a1
	mulh	a4,a4,a2			// A4 <- b0*(x - x2) high

	// get a1*y[n-1] -> A1:A0
	mul	a0,a1,t1			// A0 <- a1*y1 low
	mulh	a1,a1,t1			// A1 <- a1*y1 high

	// subtract A1:A0 from accumulator A4:A3 acc -= a1*y[n-1] -> acc = A1:A0
	sub	a0,a3,a0			// A0 <- subtract acc - a1*y1 LOW
	sltu	a2,a3,a0			// A2 <- carry, set to 1 if old_acc < new_acc
	sub	a1,a4,a1			// A1 <- subtract acc - a1*y1 HIGH
	lw	a4,BIQUAD_A2(tp)		// A4 <- a2
	sub	a1,a1,a2			// A1 <- subtract carry, result acc = A1:A0

	// get a2*y[n-2] -> A4:A3
	mul	a3,a4,t2			// A3 <- a2*y2 low
	mulh	a4,a4,t2			// A4 <- a2*y2 high

	// subtract A4:A3 from accumulator A1:A0 acc -= a2*y[n-2] -> acc = A4:A3
	sub	a3,a0,a3			// A3 <- subtract acc - a2*y2 LOW
	sltu	a2,a0,a3			// A2 <- carry, set to 1 if old_acc < new_acc
	sub	a4,a1,a4			// A4 <- subtract acc - a2*y2 HIGH
	sub	a4,a4,a2			// A4 <- subtract carry, result acc = A4:A3

	// add rounding correction to accumulator acc A4:A3
	li	a2,1 << 25			// rounding correction
	add	a3,a2,a3			// A3 <- add rounding correction LOW
	sltu	a2,a3,a2			// A2 <- carry, set to if new_acc < correction
	add	a4,a4,a2			// A4 <- add carry, recult acc = A4:A3

	// shift state
	mv	s1,s0				// S1 x2 <- S0 x1
	mv	s0,a5				// S0 x1 <- A5 x
	mv	t2,t1				// T2 y2 <- T1 y1

	// convert result to Q26 -> y = T1 y1
	srli	a3,a3,26			// A3 <- shift acc >> 26 LOW
	slli	a4,a4,(32-26)			// A4 <- get carry from HIGH
	or	t1,a3,a4			// T1 y1 <- y, add carry from HIGH

	// get max. value (positive half-wave is enough)
	ble	t1,ra,7f			// skip if y < max
	mv	ra,t1				// RA <- y

	// shift to next sample
7:	lw	a2,24(sp)			// A2 <- pointer to end of 2nd part of the buffer
	bne	t0,a2,6b			// loop next sample

	// save registers to the filter
	sw	ra,BIQUAD_MAX(tp)		// save 'max'
	sw	t1,BIQUAD_Y1(tp)		// save Y1
	sw	t2,BIQUAD_Y2(tp)		// save Y2
	sw	s0,BIQUAD_X1(tp)		// save X1
	sw	s1,BIQUAD_X2(tp)		// save X2

	// shift to next filter
	addi	tp,tp,BIQUAD_SIZE		// TP <- next filter
	lw	a2,28(sp)			// A2 <- end of filters
	bne	tp,a2,2b			// loop next filter

	// restore registers
	lw	tp,12(sp)
	lw	s1,8(sp)
	lw	s0,4(sp)
	lw	ra,0(sp)
	add	sp,sp,32
	ret

// ============================================================================
//                   PWM Timer 2 interrupt handler - Sine
// ============================================================================
// HANDLER void PWM_Handler_Sine()

	.section .text

	.align	4,,
.global PWM_Handler_Sine
PWM_Handler_Sine:

	// set next sample
	lui	a3,%hi(PwmNext)
	lw	a4,%lo(PwmNext)(a3)		// A4 <- next PWM sample
	li	a5,TIM2_BASE			// A5 <- Timer 2 base
	sw	a4,TIM_CH1CVR_OFF(a5)		// set compare/capture register 1

	// clear interrupt flag
	sw	zero,TIM_INTFR_OFF(a5)		// clear interrupt flag

	// shift phase accumulator
	lui	a4,%hi(PwmPhaseAcc)
	lui	a2,%hi(PwmPhaseInc)
	lw	a5,%lo(PwmPhaseAcc)(a4)		// A5 <- phase accumulator
	lw	a2,%lo(PwmPhaseInc)(a2)		// A2 <- phase increment
	add	a5,a5,a2			// shift phase accumulator
	sw	a5,%lo(PwmPhaseAcc)(a4)		// set new accumulator

	// sample phase
	srli	a4,a5,32 - 9			// A4 <- phase range 0..511

	// get sample 1 from sine table (samples are in range -65535..+65535)
	srli	a5,a5,32 - 9 + 7		// A5 <- sine quadrant 0..3

	// quadrant pi/2..pi or pi*3/2..2*pi
	andi	a2,a5,1				// A2 <- odd quadrant?
	beqz	a2,2f				// skip if even quadrant
	li	a2,127
	sub	a4,a2,a4			// phase = 127 - phase

	// get sine value 0..65535
2:	la	a2,PWM_SinTab			// A2 <- sine table
	andi	a4,a4,0x7f			// A4 <- mask phase low 7 bits
	slli	a4,a4,1				// A4 <- offset in the table
	add	a4,a4,a2			// A4 <- pointer to the entry
	lhu	a4,0(a4)			// A4 <- get value from the sine table

	// quadrant pi..pi*3/2 or pi*3/2..2*pi
	andi	a5,a5,2				// A5 <- quadrant 2 or 3?
	beqz	a5,2f				// skip if quadrant 0 or 1
	neg	a4,a4				// A4 <- negate value

	// convert range -65535..+65535 to range 0..LOOP-1 ... PWM_LOOP = 128 = 1 << 7
	// s = ((s + 65535) * PWM_LOOP) >> 17; 	// convert to range 0..LOOP-1
2:	li	a5,65535
	add	a4,a4,a5			// + 65535, range is now 0..131070
	srai	a4,a4,17 - 7			// convert to range 0..LOOP-1

	// save next sample
	sw	a4,%lo(PwmNext)(a3)		// save next PWM sample
	mret

// ============================================================================
//                PWM Timer 2 interrupt handler - Triangle
// ============================================================================
// HANDLER void PWM_Handler_Tri()

	.section .text

	.align	4,,
.global PWM_Handler_Tri
PWM_Handler_Tri:

	// set next sample
	lui	a3,%hi(PwmNext)
	lw	a4,%lo(PwmNext)(a3)		// A4 <- next PWM sample
	li	a5,TIM2_BASE			// A5 <- Timer 2 base
	sw	a4,TIM_CH1CVR_OFF(a5)		// set compare/capture register 1

	// clear interrupt flag
	sw	zero,TIM_INTFR_OFF(a5)		// clear interrupt flag

	// shift phase accumulator
	lui	a5,%hi(PwmPhaseAcc)
	lui	a2,%hi(PwmPhaseInc)
	lw	a4,%lo(PwmPhaseAcc)(a5)		// A4 <- phase accumulator
	lw	a2,%lo(PwmPhaseInc)(a2)		// A2 <- phase increment
	add	a4,a4,a2			// shift phase accumulator
	sw	a4,%lo(PwmPhaseAcc)(a5)		// set new accumulator

	// sample phase, get 16 bits
	srli	a5,a4,16			// A5 <- value 0..65535
	bgez	a4,2f				// skip if value is < 32768
	li	a4,65535
	sub	a5,a4,a5			// A5 <- 65535 - phase	

	// convert range 0..32767 to range 0..LOOP-1 ... PWM_LOOP = 128 = 1 << 7
	// int s = (phase * PWM_LOOP) >> 15;
2:	srai	a5,a5,15 - 7			// convert to range 0..LOOP-1

	// save next sample
	sw	a5,%lo(PwmNext)(a3)		// save next PWM sample
	mret

// ============================================================================
//                PWM Timer 2 interrupt handler - Saw
// ============================================================================
// HANDLER void PWM_Handler_Saw()

	.section .text

	.align	4,,
.global PWM_Handler_Saw
PWM_Handler_Saw:

	// set next sample
	lui	a3,%hi(PwmNext)
	lw	a4,%lo(PwmNext)(a3)		// A4 <- next PWM sample
	li	a5,TIM2_BASE			// A5 <- Timer 2 base
	sw	a4,TIM_CH1CVR_OFF(a5)		// set compare/capture register 1

	// clear interrupt flag
	sw	zero,TIM_INTFR_OFF(a5)		// clear interrupt flag

	// shift phase accumulator
	lui	a4,%hi(PwmPhaseAcc)
	lui	a2,%hi(PwmPhaseInc)
	lw	a5,%lo(PwmPhaseAcc)(a4)		// A5 <- phase accumulator
	lw	a2,%lo(PwmPhaseInc)(a2)		// A2 <- phase increment
	add	a5,a5,a2			// shift phase accumulator
	sw	a5,%lo(PwmPhaseAcc)(a4)		// set new accumulator

	// sample phase, get 16 bits ... PWM_LOOP = 128 = 1 << 7
	// convert range 0..65535 to range 0..LOOP-1
	// int phase = acc >> 16;		// value 0..65535
	// int s = (phase * PWM_LOOP) >> 16;
	srli	a5,a5,16 + 16 - 7		// convert to range 0..LOOP-1

	// save next sample
	sw	a5,%lo(PwmNext)(a3)		// save next PWM sample
	mret

// ============================================================================
//            FT Timer 1 interrupt handler - overflow of edge counter
// ============================================================================
// HANDLER void TIM1_UP_IRQHandler()
// HANDLER void FT_Handler1()

	.section .text

	.align	4,,
.global FT_Handler1
FT_Handler1:

	// clear interrupt flag, increase pulse counter HIGH
	li	a5,TIM1_BASE			// A5 <- Timer 1 base
	lui	a4,%hi(FT_CntHigh)
	lw	a3,%lo(FT_CntHigh)(a4)		// A3 <- high counter
	sw	zero,TIM_INTFR_OFF(a5)		// clear interrupt flag
	addi	a3,a3,1				// A3 <- increase high counter
	sw	a3,%lo(FT_CntHigh)(a4)		// set new high counter
	mret

// ============================================================================
//              FT Timer 2 interrupt handler - time of edges
// ============================================================================
// HANDLER void TIM2_IRQHandler()
// HANDLER void FT_Handler2()

	.section .text

	.align	4,,
.global FT_Handler2
FT_Handler2:

	// read capture result 'capture' -> A5
	li	a4,TIM2_BASE			// A4 <- Timer 2 base
	lw	a5,TIM_CH3CVR_OFF(a4)		// A5 <- get capture result

	// clear interrupt request
	sw	zero,TIM_INTFR_OFF(a4)		// clear interrupt flag

	// get current time 't' -> A3
	li	a3,SYSTICK_BASE			// A3 <- SysTick base
	lw	a3,SYSTICK_CNT_OFF(a3)		// A3 <- SysTick counter

	// get edge time 't' -> A5
	sub	a5,a5,a3			// A5 <- capture - t
	slli	a5,a5,16
	srai	a5,a5,16			// A6 <- extend to s16 signed
	add	a5,a5,a3			// A5 <- edge time

	// save time of last edge
	lui	a3,%hi(FT_Edge2)

	// increase number of edges
	lui	a2,%hi(FT_EdgeNum)
	lw	a1,%lo(FT_EdgeNum)(a2)		// A1 <- get current number of edges
	sw	a5,%lo(FT_Edge2)(a3)		// save time of last edge
	addi	a1,a1,1				// A1 <- increase number of edges
	sw	a1,%lo(FT_EdgeNum)(a2)		// save new number of edges

	// check overload
	li	a3,COUNT_OUT			// A3 <- limit 65 kHz
	bltu	a1,a3,2f			// skip if < 65 kHz

	// interrupt disable
	lw	a5,TIM_DMAINTENR_OFF(a4)	// A5 <- interrupt enable register
	andi	a5,a5,~8			// disable CC3Int
	sw	a5,TIM_DMAINTENR_OFF(a4)	// save new interrupt enable register
2:	mret

// ============================================================================
//            CNT Timer 1 interrupt handler - overflow of edge counter
// ============================================================================
// HANDLER void TIM1_UP_IRQHandler()
// HANDLER void CNT_Handler()

	.section .text

	.align	4,,
.global CNT_Handler
CNT_Handler:

	// clear interrupt flag, increase pulse counter HIGH
	li	a5,TIM1_BASE			// A5 <- Timer 1 base
	lui	a4,%hi(CntHigh)
	lw	a3,%lo(CntHigh)(a4)		// A3 <- high counter
	sw	zero,TIM_INTFR_OFF(a5)		// clear interrupt flag
	addi	a3,a3,1				// A3 <- increase high counter
	sw	a3,%lo(CntHigh)(a4)		// set new high counter
	mret

// ============================================================================
//                  L Timer 2 interrupt handler
// ============================================================================
// HANDLER void TIM2_IRQHandler()
// HANDLER void L_Handler()

	.section .text

	.align	4,,
.global L_Handler
L_Handler:

	// clear interrupt flag, increase counter HIGH
	li	a5,TIM2_BASE			// A5 <- Timer 2 base
	lui	a4,%hi(LHigh)
	lw	a3,%lo(LHigh)(a4)		// A3 <- high counter
	sw	zero,TIM_INTFR_OFF(a5)		// clear interrupt flag
	addi	a3,a3,1				// A3 <- increase high counter
	sw	a3,%lo(LHigh)(a4)		// set new high counter
	mret

// ============================================================================
//                TG Timer 1 interrupt handler (at 100kHz)
// ============================================================================
// HANDLER void TIM1_UP_IRQHandler()
// HANDLER void TG_Handler()

	.section .text

	.align	4,,
.global TG_Handler
TG_Handler:

	// clear interrupt flag, increase pulse counter HIGH
	li	a5,TIM1_BASE			// A5 <- Timer 1 base
	sw	zero,TIM_INTFR_OFF(a5)		// clear interrupt flag

	// get input pin PD2 state
	li	a5,GPIOD_BASE			// A5 <- PD port base
	lw	a5,GPIO_INDR_OFF(a5)		// A5 <- get PD port input
	slli	a5,a5,31-2			// A5 <- shift pin 2 to bit 31
	bltz	a5,2f				// skip if pin is HIGH

	// increase LOW counter
	lui	a5,%hi(TGCounterL)
	lw	a4,%lo(TGCounterL)(a5)		// A4 <- current LOW counter
	addi	a4,a4,1				// increase LOW counter
	sw	a4,%lo(TGCounterL)(a5)		// set new LOW counter
	mret

	// increase HIGH counter
2:	lui	a5,%hi(TGCounterH)
	lw	a4,%lo(TGCounterH)(a5)		// A4 <- current HIGH counter
	addi	a4,a4,1				// increase HIGH counter
	sw	a4,%lo(TGCounterH)(a5)		// set new HIGH counter
	mret

// ============================================================================
//                USART2 interrupt on receive character
// ============================================================================
// HANDLER void USART2_IRQHandler()
// HANDLER void COM_Handler()

	.section .text

	.align	4,,
.global COM_Handler
COM_Handler:

	// get USART2 base -> A5
	li	a5,USART2_BASE			// A5 <- USART2 base

	// dummy read status register to clear error flags
	lw	a4,USART_STATR_OFF(a5)		// A4 <- dummy read status register

	// get character -> A1
	lw	a1,USART_DATAR_OFF(a5)		// A1 <- read character

	// prepare pointer to screen buffer -> A4
	la	a4,COMScreen			// A4 <- pointer to screem buffer
	zext.b	a1,a1				// A1 <- zero extend byte

	// get current row -> A5
	lw	a5,COM_SCREEN_ROW(a4)		// A5 <- current row

	// check CR
	li	a2,13				// A2 <- CR character
	beq	a1,a2,6f			// skip if CR character

	// check LF
	li	a2,10				// A2 <- LF character
	beq	a1,a2,4f			// skip if LF character

	// other character: get length of current row -> A3
	add	a3,a4,a5			// A3 <- offset of length
	lbu	a3,COM_SCREEN_LEN(a3)		// A3 <- get length of current row

	// check end of the row
	li	a2,COM_SCREENW			// A2 <- screen width
	blt	a3,a2,2f			// skip if end of row not reached

	// increase row
	addi	a5,a5,1				// A5 <- increase current row
	andi	a5,a5,COM_SCREENH-1		// mask row index
	sw	a5,COM_SCREEN_ROW(a4)		// save new row index

	// reset length of current row
	add	a3,a4,a5			// A3 <- offset of length
	sb	zero,COM_SCREEN_LEN(a3)		// reset length of current row
	li	a3,0				// A3 <- new length of current row

	// store character
2:	slli	a2,a5,4				// A2 <- offset of the row in buffer
	add	a2,a2,a4			// A2 <- add row offset
	add	a2,a2,a3			// A2 <- add row position
	sb	a1,COM_SCREEN_BUF(a2)		// save character to screen buffer

	// set new length of current row
	add	a2,a4,a5			// A2 <- offset of the length
	addi	a3,a3,1				// A3 <- increase length of the row
	sb	a3,COM_SCREEN_LEN(a2)		// store new length of current row
	mret

	// LF character: increase row
4:	addi	a5,a5,1				// A5 <- increase current row
	andi	a5,a5,COM_SCREENH-1		// mask row index
	sw	a5,COM_SCREEN_ROW(a4)		// save new row index

	// CR character: reset length of current row
6:	add	a4,a4,a5			// A4 <- offset of length
	sb	zero,COM_SCREEN_LEN(a4)		// reset length of current row
	mret

// ============================================================================
//                      NA Timer 2 interrupt handler
// ============================================================================
// HANDLER void TIM2_IRQHandler()
// HANDLER void NA_Handler()

	.section .text

	.align	4,,
.global NA_Handler
NA_Handler:

	// set next sample, NANext -> A4
	lui	a4,%hi(NANext)
	lw	a3,%lo(NANext)(a4)		// A3 <- next NA sample
	li	a5,TIM2_BASE			// A5 <- Timer 2 base
	sw	a3,TIM_CH1CVR_OFF(a5)		// set compare/capture register 1

	// clear interrupt flag
	sw	zero,TIM_INTFR_OFF(a5)		// clear interrupt flag

	// shift phase accumulator, acc -> A3
	lui	a5,%hi(NAPhaseAcc)
	lui	a2,%hi(NAPhaseInc)
	lw	a3,%lo(NAPhaseAcc)(a5)		// A3 <- phase accumulator
	lw	a2,%lo(NAPhaseInc)(a2)		// A2 <- phase increment
	la	a1,Buf				// A1 <- pointer to buffer
	add	a3,a3,a2			// shift phase accumulator
	sw	a3,%lo(NAPhaseAcc)(a5)		// set new accumulator

	// get offset to buffer -> A5
	li	a0,NA_BUFMASK			// A0 <- buffer index mask NA_BUFMASK
	srli	a5,a3,NA_STEPBITS		// A5 <- buffer index
	and	a5,a5,a0			// A5 <- mask buffer index = off

	// get first sample -> A2
	add	a2,a5,a1			// A2 <- address in buffer = Buf + off
	lbu	a2,0(a2)			// A2 <- load first sample

	// get second sample -> A5
	add	a5,a5,1				// A5 <- next index + 1
	and	a5,a5,a0			// A5 <- mask next buffer index
	add	a5,a5,a1			// A5 <- address in buffer + 1
	lbu	a5,0(a5)			// A5 <- load second sample

	// interpolation
	andi	a3,a3,NA_PHASEMASK		// A3 <- mask fraction part of the accumulator
	sub	a5,a5,a2			// A5 <- 2nd sample - 1st sample
	mul	a5,a5,a3			// A5 <- (s2-s1) * acc
	srli	a5,a5,NA_STEPBITS		// A5 <- shift result
	add	a5,a5,a2			// A5 <- add 1st sample

	// save next sample
	sw	a5,%lo(NANext)(a4)		// save next NA sample
	mret

// ============================================================================
//                     PH - interrupt on signal edge
// ============================================================================
// HANDLER void NOFLASH(TIM1_CC_IRQHandler)()
// HANDLER void NOFLASH(PH_Handler)()

	.section .time_critical, "ax"

	.align	4,,
.global PH_Handler
PH_Handler:

	// prepare TIM1 base -> A5
	li	a5,TIM1_BASE		// A5 <- TIM1 base

	// read capture result -> A4
	lw	a4,TIM_CH1CVR_OFF(a5)	// A4 <- capture result

	// clear interrupt result
	sw	zero,TIM_INTFR_OFF(a5)	// clear interrupt status register

	// get difference from previous edge -> A0
	lui	a5,%hi(PH_Last)
	lhu	a0,%lo(PH_Last)(a5)	// A0 <- get last time
	sh	a4,%lo(PH_Last)(a5)	// save last time
	sub	a0,a4,a0		// A0 <- time difference
	slli	a0,a0,16
	srli	a0,a0,16		// A0 <- limit to u16

	// check short pulse -> A0 pulse index
	li	a5,16
	bltu	a0,a5,4f		// skip if pulse is < 16

	// save pulse to the stack
	add	sp,sp,-4
	sw	a0,0(sp)		// save A0

	// call clz() instruction
	call	__clzsi2		// call __clzsi2 function
// Other alternative:
// 1:	auipc	ra,%pcrel_hi(__clzsi2)	// RA <- PC + hi(__clzsi2) pointer to the function
//	jalr	%pcrel_lo(1b)(ra)	// call __clzsi2 funcion

	// restore pulse -> A1
	lw	a1,0(sp)		// A1 <- pulse length
	add	sp,sp,4

	// get base index (first value 16: ord = 32 - 27 = 5)
	li	a5,32-5			// 27
	sub	a5,a5,a0		// A5 <- ord - 5
	slli	a5,a5,3			// A5 <- (ord - 5) << 3
	addi	a5,a5,16		// A5 <- base index

	// add linear 3-bit part -> A0
	li	a4,32-4			// 28
	sub	a4,a4,a0		// A4 <- ord - 4
	sra	a0,a1,a4		// shift time t2 >> (ord - 4)
	andi	a0,a0,7			// A0 <- linear 3-bit part
	add	a0,a0,a5		// A0 <- index

	// prepare TIM1 base -> A5
4:	li	a5,TIM1_BASE		// A5 <- TIM1 base

	// flip edge
	lw	a3,TIM_CCER_OFF(a5)	// A3 <- get capture enable register
	li	a2,1<<1			// A2 <- bit 1 (flag of falling edge)
	xor	a1,a3,a2		// A1 <- change edge flag
	sw	a1,TIM_CCER_OFF(a5)	// set new flag

	// falling edge - end of HIGH pulse
	and	a1,a3,a2		// check falling edge
	beqz	a1,2f			// skip if rising edge (flag is 0)

	// count HIGH pulse - shift to HIGH buffer
	addi	a0,a0,128		// shift index to HIGH buffer

	// prepare buffer address -> A5
2:	la	a5,Buf			// A5 <- base buffer address
	slli	a0,a0,2			// A0 <- convert index to offset
	add	a5,a5,a0		// A5 <- address in buffer
	lw	a4,0(a5)		// A4 <- load current counter
	addi	a4,a4,1			// A4 <- increment counte
	sw	a4,0(a5)		// save new counter
	mret

// ============================================================================
//                     REP - interrupt on signal edge
// ============================================================================
// HANDLER void TIM1_CC_IRQHandler()
// HANDLER void REP_Handler()

	.section .text

	.align	4,,
.global REP_Handler
REP_Handler:

	// prepare TIM1 base -> A5
	li	a5,TIM1_BASE		// A5 <- TIM1 base

	// read capture result -> A1
	lw	a1,TIM_CH1CVR_OFF(a5)	// A1 <- capture result

	// clear interrupt result
	sw	zero,TIM_INTFR_OFF(a5)	// clear interrupt status register

	// flip edge
	lw	a4,TIM_CCER_OFF(a5)	// A4 <- get capture enable register
	lui	a2,%hi(REPSamples)
	xori	a4,a4,2			// A4 <- change edge flag
	sw	a4,TIM_CCER_OFF(a5)	// set new flag

	// check if store index is valid
	lw	a5,%lo(REPSamples)(a2)	// A5 <- current save index
	li	a4,REP_BUFSIZE		// A4 <- buffer size
	bge	a5,a4,2f		// skip if buffer is full

	// save new sample
	la	a3,Buf			// A3 <- buffer address
	slli	a4,a5,1			// A4 <- offset in buffer
	add	a4,a4,a3		// A4 <- pointer to buffer
	sh	a1,0(a4)		// save new sample

	// increase sample index
	addi	a5,a5,1			// A5 <- next sample index
	sw	a5,%lo(REPSamples)(a2)	// save new index
2:	mret
