
// ****************************************************************************
//
//                                 Main code
//
// ****************************************************************************

#include "include.h"

/*
Instruction timings on 125MHz
=============================
Move
        MOVS Rd,#<imm>   1 clk (8 ns)
        MOVS Rd,Rm       1 clk (8 ns)
        MOV Rd,Rm        1 clk (8 ns)
        MOV PC,Rm        2 clk (16 ns)
Add
        ADDS Rd,Rn,#<imm>        1 clk (8 ns)
        ADDS Rd,Rn,Rm    1 clk (8 ns)
        ADD Rd,Rm        1 clk (8 ns)
        ADD PC,Rm        2 clk (16 ns)
        ADDS Rd,#<imm>   1 clk (8 ns)
        ADCS Rd,Rm       1 clk (8 ns)
        ADD SP,#<imm>    1 clk (8 ns)
        ADD Rd,SP,#<imm>         1 clk (8 ns)
        ADR Rd,<label>   1 clk (8 ns)
Subtract
        SUBS Rd,Rn,Rm    1 clk (8 ns)
        SUBS Rd,Rn,#<imm>        1 clk (8 ns)
        SUBS Rd,#<imm>   1 clk (8 ns)
        SBCS Rd,Rm       1 clk (8 ns)
        SUB SP,#<imm>    1 clk (8 ns)
        NEGS Rd,Rn       1 clk (8 ns)
Multiply
        MULS Rd,Rn       1 clk (8 ns)
Compare
        CMP Rn,Rm        1 clk (8 ns)
        CMN Rn,Rm        1 clk (8 ns)
        CMP Rn,#<imm>    1 clk (8 ns)
Logical
        ANDS Rd,Rm       1 clk (8 ns)
        EORS Rd,Rm       1 clk (8 ns)
        ORRS Rd,Rm       1 clk (8 ns)
        BICS Rd,Rm       1 clk (8 ns)
        MVNS Rd,Rm       1 clk (8 ns)
        TST Rn,Rm        1 clk (8 ns)
Shift
        LSLS Rd,Rm,#<shift>      1 clk (8 ns)
        LSLS Rd,Rs       1 clk (8 ns)
        LSRS Rd,Rm,#<shift>      1 clk (8 ns)
        LSRS Rd,Rs       1 clk (8 ns)
        ASRS Rd,Rm,#<shift>      1 clk (8 ns)
        ASRS Rd,Rs       1 clk (8 ns)
Rotate
        RORS Rd,Rs       1 clk (8 ns)
Load RAM
        LDR Rd,[Rn,#<imm>]       2 clk (16 ns)
        LDRH Rd,[Rn,#<imm>]      2 clk (16 ns)
        LDRB Rd,[Rn,#<imm>]      2 clk (16 ns)
        LDR Rd,[Rn,Rm]   2 clk (16 ns)
        LDRH Rd,[Rn,Rm]  2 clk (16 ns)
        LDRSH Rd,[Rn,Rm]         2 clk (16 ns)
        LDRB Rd,[Rn,Rm]  2 clk (16 ns)
        LDRSB Rd,[Rn,Rm]         2 clk (16 ns)
        LDR Rd,<label>   2 clk (16 ns)
        LDR Rd,[SP,#<imm>]       2 clk (16 ns)
        LDM Rn!,{<list>}         4 clk (32 ns)
        LDM Rn,{<list>}  5 clk (40 ns)
Load SIO
        LDR Rd,[Rn,#<imm>]       1 clk (8 ns)
        LDRH Rd,[Rn,#<imm>]      1 clk (8 ns)
        LDRB Rd,[Rn,#<imm>]      1 clk (8 ns)
        LDR Rd,[Rn,Rm]   1 clk (8 ns)
        LDRH Rd,[Rn,Rm]  1 clk (8 ns)
        LDRSH Rd,[Rn,Rm]         1 clk (8 ns)
        LDRB Rd,[Rn,Rm]  1 clk (8 ns)
        LDRSB Rd,[Rn,Rm]         1 clk (8 ns)
Store RAM
        STR Rd,[Rn,#<imm>]       2 clk (16 ns)
        STRH Rd,[Rn,#<imm>]      2 clk (16 ns)
        STRB Rd,[Rn,#<imm>]      2 clk (16 ns)
        STR Rd,[Rn,Rm]   2 clk (16 ns)
        STRH Rd,[Rn,Rm]  2 clk (16 ns)
        STRB Rd,[Rn,Rm]  2 clk (16 ns)
        STR Rd,[SP,#<imm>]       2 clk (16 ns)
        STM Rn!,{<list>}         4 clk (32 ns)
Store SIO
        STR Rd,[Rn,#<imm>]       1 clk (8 ns)
        STRH Rd,[Rn,#<imm>]      1 clk (8 ns)
        STRB Rd,[Rn,#<imm>]      1 clk (8 ns)
        STR Rd,[Rn,Rm]   1 clk (8 ns)
        STRH Rd,[Rn,Rm]  1 clk (8 ns)
        STRB Rd,[Rn,Rm]  1 clk (8 ns)
Push
        PUSH {<list>}    2 clk (16 ns)
        PUSH {<list>,LR}         2 clk (16 ns)
Pop
        POP {<list>}     2 clk (16 ns)
        POP {PC}         4 clk (32 ns)
Branch
        B<cc> <label> not        1 clk (8 ns)
        B<cc> <label> taken      2 clk (16 ns)
        B <label>        2 clk (16 ns)
        BL <label>       3 clk (24 ns)
        BX Rm    2 clk (16 ns)
        BLX Rm   2 clk (16 ns)
Extend
        SXTH Rd,Rm       1 clk (8 ns)
        SXTB Rd,Rm       1 clk (8 ns)
        UXTH Rd,Rm       1 clk (8 ns)
        UXTB Rd,Rm       1 clk (8 ns)
Reverse
        REV Rd,Rm        1 clk (8 ns)
        REV16 Rd,Rm      1 clk (8 ns)
        REVSH Rd,Rm      1 clk (8 ns)
State
        CPSID i  1 clk (8 ns)
        CPSIE i  1 clk (8 ns)
        MRS Rd,<reg>     3 clk (24 ns)
        MSR <reg>,Rn     3 clk (24 ns)
Hint
        SEV      1 clk (8 ns)
        WFE      2 clk (16 ns)
        YIELD    1 clk (8 ns)
        NOP      1 clk (8 ns)
Barriers
        ISB      3 clk (24 ns)
        DMB      3 clk (24 ns)
        DSB      3 clk (24 ns)
*/

// sys_clk in kHz
u32 khz;

typedef void (*testfnc)();

NOINLINE void TestCalc(const char* text, int rept, int td, int subs)
{
	// 1000x loop, delta time is in [ns]
	double ns = (double)td;

	// get sys_clk ticks
	// - subtract 4 ticks per "for" loop (subs #1 [1], cmp #0 [1], bne loop [2,1])
	// - subtract 3 ticks per "BL" instruction
	// - subtract 2 ticks per "BX LR" instruction
	double clk = ns/1000000*khz - 4 - 3 - 2;

	// clock ticks per instruction
	int i = (int)(clk/rept + 0.5) - subs;

	// print
	printf("\t%s\t %d clk (%d ns)\n", text, i, i*8);

	sleep_ms(10);
}

// test one instruction
INLINE void Test1(const char* text, testfnc fnc, int subs)
{
	int i, t;

	// measure time delta in [us]
	t = (int)time_us_64();
	for (i = 1000; i > 0; i--) fnc();
	t = (int)time_us_64() - t;

	TestCalc(text, REPT, t, subs);
}

// test one instruction MOV PC,r
INLINE void Test2(const char* text, testfnc fnc, int subs)
{
	int i, t;

	// measure time delta in [us]
	t = (int)time_us_64();
	for (i = 1000; i > 0; i--) fnc();
	t = (int)time_us_64() - t;

	TestCalc(text, 10, t, subs);
}

int main()
{
	u64 t1, t2;
	int i;

	// get system frequency
	khz = frequency_count_khz(CLOCKS_FC0_SRC_VALUE_CLK_SYS);

	// initialize STDIO
	stdio_init_all();

	TestData[0] = (u32)&sio_hw->gpio_in;
	TestData[1] = (u32)&usb_hw->sie_status;
	TestData[2] = (u32)&sio_hw->gpio_togl;

	while (true)
	{
		printf("\nInstruction timings on 125MHz\n");
		printf("=============================\n");
		printf("Move\n");
		Test1("MOVS Rd,#<imm>",		TestMove1, 0);
		Test1("MOVS Rd,Rm",		TestMove2, 0);
		Test1("MOV Rd,Rm",		TestMove3, 0);
		Test2("MOV PC,Rm",		TestMove4, 1);
		printf("Add\n");
		Test1("ADDS Rd,Rn,#<imm>",	TestAdd1, 0);
		Test1("ADDS Rd,Rn,Rm",		TestAdd2, 0);
		Test1("ADD Rd,Rm",		TestAdd3, 0);
		Test2("ADD PC,Rm",		TestAdd4, 1);
		Test1("ADDS Rd,#<imm>",		TestAdd5, 0);
		Test1("ADCS Rd,Rm",		TestAdd6, 0);
		Test1("ADD SP,#<imm>",		TestAdd7, 0);
		Test1("ADD Rd,SP,#<imm>",	TestAdd8, 0);
		Test1("ADR Rd,<label>",		TestAdd9, 0);
		printf("Subtract\n");
		Test1("SUBS Rd,Rn,Rm",		TestSub1, 0);
		Test1("SUBS Rd,Rn,#<imm>",	TestSub2, 0);
		Test1("SUBS Rd,#<imm>",		TestSub3, 0);
		Test1("SBCS Rd,Rm",		TestSub4, 0);
		Test1("SUB SP,#<imm>",		TestSub5, 0);
		Test1("NEGS Rd,Rn",		TestSub6, 0);
		printf("Multiply\n");
		Test1("MULS Rd,Rn",		TestMul, 0);
		printf("Compare\n");
		Test1("CMP Rn,Rm",		TestCmp1, 0);
		Test1("CMN Rn,Rm",		TestCmp2, 0);
		Test1("CMP Rn,#<imm>",		TestCmp3, 0);
		printf("Logical\n");
		Test1("ANDS Rd,Rm",		TestAnd, 0);
		Test1("EORS Rd,Rm",		TestEor, 0);
		Test1("ORRS Rd,Rm",		TestOrr, 0);
		Test1("BICS Rd,Rm",		TestBic, 0);
		Test1("MVNS Rd,Rm",		TestMvn, 0);
		Test1("TST Rn,Rm",		TestTst, 0);
		printf("Shift\n");
		Test1("LSLS Rd,Rm,#<shift>",	TestLsls1, 0);
		Test1("LSLS Rd,Rs",		TestLsls2, 0);
		Test1("LSRS Rd,Rm,#<shift>",	TestLsrs1, 0);
		Test1("LSRS Rd,Rs",		TestLsrs2, 0);
		Test1("ASRS Rd,Rm,#<shift>",	TestAsrs1, 0);
		Test1("ASRS Rd,Rs",		TestAsrs2, 0);
		printf("Rotate\n");
		Test1("RORS Rd,Rs",		TestRors, 0);
		printf("Load RAM\n");
		Test1("LDR Rd,[Rn,#<imm>]",	TestLdr1, 0);
		Test1("LDRH Rd,[Rn,#<imm>]",	TestLdr2, 0);
		Test1("LDRB Rd,[Rn,#<imm>]",	TestLdr3, 0);
		Test1("LDR Rd,[Rn,Rm]",		TestLdr4, 0);
		Test1("LDRH Rd,[Rn,Rm]",	TestLdr5, 0);
		Test1("LDRSH Rd,[Rn,Rm]",	TestLdr6, 0);
		Test1("LDRB Rd,[Rn,Rm]",	TestLdr7, 0);
		Test1("LDRSB Rd,[Rn,Rm]",	TestLdr8, 0);
		Test1("LDR Rd,<label>",		TestLdr9, 0);
		Test1("LDR Rd,[SP,#<imm>]",	TestLdr10, 0);
		Test1("LDM Rn!,{<list>}",	TestLdr11, 0);
		Test1("LDM Rn,{<list>}",	TestLdr12, 2);
		printf("Load SIO\n");
		Test1("LDR Rd,[Rn,#<imm>]",	TestLdr1B, 0);
		Test1("LDRH Rd,[Rn,#<imm>]",	TestLdr2B, 0);
		Test1("LDRB Rd,[Rn,#<imm>]",	TestLdr3B, 0);
		Test1("LDR Rd,[Rn,Rm]",		TestLdr4B, 0);
		Test1("LDRH Rd,[Rn,Rm]",	TestLdr5B, 0);
		Test1("LDRSH Rd,[Rn,Rm]",	TestLdr6B, 0);
		Test1("LDRB Rd,[Rn,Rm]",	TestLdr7B, 0);
		Test1("LDRSB Rd,[Rn,Rm]",	TestLdr8B, 0);
		printf("Store RAM\n");
		Test1("STR Rd,[Rn,#<imm>]",	TestStr1, 0);
		Test1("STRH Rd,[Rn,#<imm>]",	TestStr2, 0);
		Test1("STRB Rd,[Rn,#<imm>]",	TestStr3, 0);
		Test1("STR Rd,[Rn,Rm]",		TestStr4, 0);
		Test1("STRH Rd,[Rn,Rm]",	TestStr5, 0);
		Test1("STRB Rd,[Rn,Rm]",	TestStr6, 0);
		Test1("STR Rd,[SP,#<imm>]",	TestStr7, 0);
		Test1("STM Rn!,{<list>}",	TestStr8, 0);
		printf("Store SIO\n");
		Test1("STR Rd,[Rn,#<imm>]",	TestStr1B, 0);
		Test1("STRH Rd,[Rn,#<imm>]",	TestStr2B, 0);
		Test1("STRB Rd,[Rn,#<imm>]",	TestStr3B, 0);
		Test1("STR Rd,[Rn,Rm]",		TestStr4B, 0);
		Test1("STRH Rd,[Rn,Rm]",	TestStr5B, 0);
		Test1("STRB Rd,[Rn,Rm]",	TestStr6B, 0);
		printf("Push\n");
		Test1("PUSH {<list>}",		TestPush1, 0);
		Test1("PUSH {<list>,LR}",	TestPush2, 0);
		printf("Pop\n");
		Test1("POP {<list>}",		TestPop1, 0);
		Test2("POP {PC}",		TestPop2, 4);
		printf("Branch\n");
		Test1("B<cc> <label> not",	TestB1, 0);
		Test1("B<cc> <label> taken",	TestB1B, 0);
		Test1("B <label>",		TestB2, 0);
		Test1("BL <label>",		TestB3, 0);
		Test2("BX Rm",			TestB4, 2);
		Test2("BLX Rm",			TestB5, 2);
		printf("Extend\n");
		Test1("SXTH Rd,Rm",		TestExt1, 0);
		Test1("SXTB Rd,Rm",		TestExt2, 0);
		Test1("UXTH Rd,Rm",		TestExt3, 0);
		Test1("UXTB Rd,Rm",		TestExt4, 0);
		printf("Reverse\n");
		Test1("REV Rd,Rm",		TestRev1, 0);
		Test1("REV16 Rd,Rm",		TestRev2, 0);
		Test1("REVSH Rd,Rm",		TestRev3, 0);
		printf("State\n");
		Test1("CPSID i",		TestStat1, 0);
		Test1("CPSIE i",		TestStat2, 0);
		Test1("MRS Rd,<reg>",		TestStat3, 0);
		Test1("MSR <reg>,Rn",		TestStat4, 0);
		printf("Hint\n");
		Test1("SEV",			TestHint1, 0);
		Test1("WFE",			TestHint2, 1);
		Test1("YIELD",			TestHint3, 0);
		Test1("NOP",			TestHint4, 0);
		printf("Barriers\n");
		Test1("ISB",			TestBar1, 0);
		Test1("DMB",			TestBar2, 0);
		Test1("DSB",			TestBar3, 0);

		sleep_ms(10000);
	}
}
