Random Tidbits – Page 2 – Alex Ionescu’s Blog

May 8, 2007

New Object Manager Filtering APIs

The new bits of the WDK have been released, and it seems that finally, we are starting to see a glimpse of some of the new filtering technologies that were promised in Vista SP1 to help with incompatibilities due to PatchGuard. Although Vista added powerful Registry filtering support on top of the existing File filtering architecture, hooking some native calls would still have been necessary in order to filter other kinds of system behaviour. The first which seems to have been addressed are handles, and Vista SP1 now supports Object Manager Filters.

Currently, only create and duplicate can be filtered, but the ability for both pre and post notifications exist. As with the new filter model, Object Manager Filters also support Altitudes, and are fully versionned. Unfortunately, this new set of APIs seems rather disappointing to me. For starters, this functionality was already available, even behind Patchguard’s back, through native Object Manager callbacks present in the OBJECT_TYPE’s ObjectTypeInitializer structure which contains all the callbacks for the object type. This interface seems to do nothing more but expose in a more public and accessible way the same ObCreateMethod interface that has existed since NT4, except that it only works for create and duplicate (while the internal interface allows for open and inherit as well).

Nevertheless, this new filtering mechanism is clearly written to be extensible for other Object Manager actions, so hopefully we’ll see some new improvements before SP1 actually ships. For the curious, here are some of the new toys to play with:

//
// Registration version for Vista SP1 and Windows Server 2007
//
#define OB_FLT_REGISTRATION_VERSION_0100Â 0x0100

//
// This value should be used by filters for registration
//
#define OB_FLT_REGISTRATION_VERSION OB_FLT_REGISTRATION_VERSION_0100

typedef ULONG OB_OPERATION;

#define OB_OPERATION_HANDLE_CREATEÂ Â Â Â Â Â Â Â Â Â Â Â Â 0x00000001
#define OB_OPERATION_HANDLE_DUPLICATEÂ Â Â Â Â Â Â Â Â Â 0x00000002

typedef struct _OB_PRE_CREATE_HANDLE_INFORMATION {
Â Â Â __inout ACCESS_MASKÂ Â Â Â Â Â Â Â DesiredAccess;
Â Â Â __in ACCESS_MASKÂ Â Â Â Â Â Â Â Â Â Â OriginalDesiredAccess;
} OB_PRE_CREATE_HANDLE_INFORMATION, *POB_PRE_CREATE_HANDLE_INFORMATION;

typedef struct _OB_PRE_DUPLICATE_HANDLE_INFORMATION {
Â Â Â __inout ACCESS_MASKÂ Â Â Â Â Â Â Â DesiredAccess;
Â Â Â __in ACCESS_MASKÂ Â Â Â Â Â Â Â Â Â Â OriginalDesiredAccess;
Â Â Â __in PVOIDÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â SourceProcess;
Â Â Â __in PVOIDÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â TargetProcess;
} OB_PRE_DUPLICATE_HANDLE_INFORMATION, * POB_PRE_DUPLICATE_HANDLE_INFORMATION;

typedef union _OB_PRE_OPERATION_PARAMETERS {
Â Â Â __inout OB_PRE_CREATE_HANDLE_INFORMATIONÂ Â Â Â Â Â Â CreateHandleInformation;
Â Â Â __inout OB_PRE_DUPLICATE_HANDLE_INFORMATIONÂ Â Â Â DuplicateHandleInformation;
} OB_PRE_OPERATION_PARAMETERS, *POB_PRE_OPERATION_PARAMETERS;

typedef struct _OB_PRE_OPERATION_INFORMATION {
Â Â Â __in OB_OPERATIONÂ Â Â Â Â Â Â Â Â Â Operation;
Â Â Â union {
Â Â Â Â Â Â Â __in ULONG Flags;
Â Â Â Â Â Â Â struct {
Â Â Â Â Â Â Â Â Â Â Â __in ULONG KernelHandle:1;
Â Â Â Â Â Â Â Â Â Â Â __in ULONG Reserved:31;
Â Â Â Â Â Â Â };
Â Â Â };
Â Â Â __in PVOIDÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Object;
Â Â Â __in POBJECT_TYPEÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â ObjectType;
Â Â Â __out PVOIDÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â CallContext;
Â Â Â __in POB_PRE_OPERATION_PARAMETERSÂ Parameters;
} OB_PRE_OPERATION_INFORMATION, *POB_PRE_OPERATION_INFORMATION;

typedef struct _OB_POST_CREATE_HANDLE_INFORMATION {
Â Â Â __in ACCESS_MASKÂ Â Â Â Â Â Â Â Â Â Â GrantedAccess;
} OB_POST_CREATE_HANDLE_INFORMATION, *POB_POST_CREATE_HANDLE_INFORMATION;

typedef struct _OB_POST_DUPLICATE_HANDLE_INFORMATION {
Â Â Â __in ACCESS_MASKÂ Â Â Â Â Â Â Â Â Â Â GrantedAccess;
} OB_POST_DUPLICATE_HANDLE_INFORMATION, * POB_POST_DUPLICATE_HANDLE_INFORMATION;

typedef union _OB_POST_OPERATION_PARAMETERS {
Â Â Â __in OB_POST_CREATE_HANDLE_INFORMATIONÂ Â Â Â Â Â CreateHandleInformation;
Â Â Â __in OB_POST_DUPLICATE_HANDLE_INFORMATIONÂ Â Â DuplicateHandleInformation;
} OB_POST_OPERATION_PARAMETERS, *POB_POST_OPERATION_PARAMETERS;

typedef struct _OB_POST_OPERATION_INFORMATION {
Â Â Â __in OB_OPERATIONÂ Operation;
Â Â Â union {
Â Â Â Â Â Â Â __in ULONG Flags;
Â Â Â Â Â Â Â struct {
Â Â Â Â Â Â Â Â Â Â Â __in ULONG KernelHandle:1;
Â Â Â Â Â Â Â Â Â Â Â __in ULONG Reserved:31;
Â Â Â Â Â Â Â };
Â Â Â };
Â Â Â __in PVOIDÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Object;
Â Â Â __in POBJECT_TYPEÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â ObjectType;
Â Â Â __in PVOIDÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â CallContext;
Â Â Â __in NTSTATUSÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â ReturnStatus;
Â Â Â __in POB_POST_OPERATION_PARAMETERSÂ Parameters;
} OB_POST_OPERATION_INFORMATION,*POB_POST_OPERATION_INFORMATION;

typedef enum _OB_PREOP_CALLBACK_STATUS {
Â Â Â OB_PREOP_SUCCESS
} OB_PREOP_CALLBACK_STATUS, *POB_PREOP_CALLBACK_STATUS;

typedef OB_PREOP_CALLBACK_STATUS
(*POB_PRE_OPERATION_CALLBACK) (
Â Â Â __in PVOID RegistrationContext,
Â Â Â __inout POB_PRE_OPERATION_INFORMATION OperationInformation
Â Â Â );

typedef VOID
(*POB_POST_OPERATION_CALLBACK) (
Â Â Â __in PVOID RegistrationContext,
Â Â Â __in POB_POST_OPERATION_INFORMATION OperationInformation
Â Â Â );

typedef struct _OB_OPERATION_REGISTRATION {
Â Â Â __in POBJECT_TYPEÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â *ObjectType;
Â Â Â __in OB_OPERATIONÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Operations;
Â Â Â __in POB_PRE_OPERATION_CALLBACKÂ PreOperation;
Â Â Â __in POB_POST_OPERATION_CALLBACK PostOperation;
} OB_OPERATION_REGISTRATION, *POB_OPERATION_REGISTRATION;

typedef struct _OB_CALLBACK_REGISTRATION {
Â Â Â __in USHORTÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Version;
Â Â Â __in USHORTÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â OperationRegistrationCount;
Â Â Â __in UNICODE_STRINGÂ Â Â Â Â Â Â Â Â Â Â Â Altitude;
Â Â Â __in PVOIDÂ Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â RegistrationContext;
Â Â Â __in OB_OPERATION_REGISTRATIONÂ *OperationRegistration;
} OB_CALLBACK_REGISTRATION, *POB_CALLBACK_REGISTRATION;

#if (NTDDI_VERSION >= NTDDI_VISTASP1)
NTKERNELAPI
NTSTATUS
ObRegisterCallbacks (
Â Â Â __in POB_CALLBACK_REGISTRATION CallbackRegistration,
Â Â Â __deref_out PVOID *RegistrationHandle
Â Â Â );

NTKERNELAPI
VOID
ObUnRegisterCallbacks (
Â Â Â __in PVOID RegistrationHandle
Â Â Â );

NTKERNELAPI
USHORT
ObGetFilterVersion ();
#endif

March 16, 2007March 17, 2007

Interviewing at Apple, Google and Microsoft

As some of you may or may not know, I’ve spent the last few weeks interviewing with the big three, and drove myself crazy choosing which offer to select (to be fair, I just finished interviewing with MSFT today; the offer, if any, will only come on Monday, when I make my big choice).

For the benefit of everyone, I decided that once my decision is made, I’d post more information about the process at all the three companies. How were the recruiters, the interviewers, the kind of perks to expect (yes we all know Google has free food), and more. I hope it’ll be a good write-up and perhaps end up with a chart, much like the Google/Yahoo/Windows Live(MSFT) blog post ended up.

It’s been great interviewing with all three though, and if anyone from there is reading this, thank you for everything!

March 6, 2007March 6, 2007

Vista DRM Issue Aftermath

I received word from Microsoft today on the status of the Vista DRM Issue that I talked about earlier. It seems that the final consensus from their internal investigation is that my method does not constitute a viable means of exploting the driver signing/DRM model. In other words, the theory I came up with that might allow PMP to be subverted seems to have been proven false.

My original idea was to use boot Vista with the /DEBUG flag and then use the internal, undocumented Kernel-Mode Debug API to load executable code in kernel-memory or to overwrite existing code (as well as to disable PatchGuard). My rationale was that PMP wouldn’t detect any issues, since no unsigned code was running in the kernel, instead, you would have code hidden in Non Paged Pool or as part of \Driver\Null’s IOCTL routine (similarly to how Johanna loaded code using the pagefile.sys). However, it seems this won’t work, I’m assuming because PMP will actually detect that you’ve booted in Debug Mode, and it will enter reduced functionality mode (Which was the hypothesis on which the entire idea depended on). Since I don’t know more about PMP, I’m not sure if this is what happens, but that’s my personal guess. Either ways, it seems DRM is here to stay for now.

Speaking of reduced functionality mode, if you turn of the Secured Licensing Service (SLsvc) in Vista, the Control Panel and Windows Update stop working. I was disabling services to get a minimalstic Vista desktop (I don’t like booting with 50 processes on startup), and I didn’t care about this service, disabling it and assuming PMP would block me from playing BluRay/HDDVD (Which I don’t have)… but I never guessed it would kill the Control Panel. Seems kinda weird.

When I get back home, I”ll post a list of the only services that I’m running on Vista. It’s got all the functionality I need (Internet, Printing, Audio). I’m getting a new hard drive for my server tonight, as well as ugprading my main desktop CPU from an AMD64 X2 3800+ to an Opteron 185. That’s a jump from 2x2GHz, 1MB Cache to 2×2.6GHz, 2MB cache. I’m hoping to overclock to 2.8GHz. Do NOT get an FX-60. They’re the exact same chip, but they cost twice as more.

February 16, 2007

Rebooting from Kernel Mode

I see this question posted on OSR Online a lot: “How do I force a reboot of the computer from kernel mode?”. The clean solution always being recommended is to have a user-mode service that talks to the driver and does the appropriate ExitWindowsEx API call. But what if you really want to do it from kernel-mode? Well, you could use HalReturnToFirmware or NtShutdownSystem, but those functions are undocumented, and you probably won’t get WHQLed if you try using them. So I’ll show you a sneaky way that does the same, but uses a fully documented kernel API. Don’t use it unless you really know what you’re doing; I personally recommend using a service as well.

Rebooting a machine from kernel mode:

KeBugCheck(POWER_FAILURE_SIMULATE);

Now, I know what you’re thinking, but you’re wrong. This will *not* bugcheck the machine. It will actually call HalReturnToFirmware(HalRebootMachine), right after processing bugcheck callbacks. No BSOD, no crash dump, just a clean, simple, immediate reboot.

Enjoy 😉

February 6, 2007

WordPress 2.1 Server Upgrade

I’ve just updated the server to WordPress 2.1, so please let me know if you see any weird errors or unexpected behavior!

January 30, 2007January 31, 2007

Recent Events

After initially being slashdotted, my blog post below got linked across the blogosphere, hit Digg, the Inquirer, BoingBoing and other major news sites, and I’ve reached some 60 000 visitors in less than 24 hours…

Since most of you are therefore new visitors, I just wanted to post a short introduction/information paragraph. First of all, I suggest you visit the About page of the blog, as well as my Wiki page on the ReactOS website. This is just to clear up any confusion on where I currently reside, age, education, etc. If you are interested in my other publications/works as a security researcher, you should visit the Publications page, as well as OpenRCE, where I usually post my latest articles. You can also find a recording of my REcon 2006 talk on Archive.Org. Search for my name; the PDF is available on the Publications page as well. Finally, my project, ReactOS, is having a donation fund; if you’d like to donate some money, that would be very appreciated.

As for the DRM post, I never expected that it would get the kind of attention it has; to be fair, I had completely forgotten that today was Vista’s launch date (being a beta tester, I’ve had RTM for months now); I certaintly don’t want to make it seem like I was specifically targetting this day to release anything. Later this week I will release some safe, generic, proof of concept code that targets what I believe is a flaw in the Code Integrity/Driver Signing model. My 64-bit VM is running extremly slow, so it will take me some time to test the code. Because this code will require an initial reboot, Microsoft does not consider it to be a flaw from a security standpoint. And because it’s so generic, it has absolutely nothing to do with DRM or PMP. That being said, I’m sure someone with knowledge of the PMP implementation might be able to use this as a very smart building block of the entire code that would be required; but that would be like arresting every knife manufacturer because knives can kill people.

Finally, if any of you would like more information about ReactOS or would like to meet in person, I will be giving a talk at the SOCAL5X conference on February 9th, and I will be around LA on the 10th as well.

January 8, 2007

Solution to Challenge

The clock has ticked past midnight, so it’s now time to reveal the solution to my previous challenge. When I say “Solution” I mean what I and others are aware to be the currently best method. Nobody else has found anything better, and the two “winners” have presented the same solution (which Windows itself uses).

Since the question originally came to me from a developer at Microsoft, and I mentionned this, it was safe to assume that the method Windows used was probably “the right answer”. However, the hard part was explaining what exactly it was doing.

Correct solutions came, in order, from Matt Miller, Razvan Hobeanu and Ken Johnson. These are some of my favorite blogs to read and people I respect most, so I was honoured that they took the time to write up a solution (thanks to everyone else as well!). I will present a “full” solution, including the 64-bit implementation, and the actual code in the kernel responsible for this hack.

Before I start however, there’s one esoteric solution from Myria which I thought was funny enough to be shared. She proposed, roughly: 1) SetThreadAffinityMask(GetCurrentThread(), 1); 2) return 0;

This cute answer will first force the thread to run on CPU 0, then return… CPU 0. Technically this is true, but it’s also completely useless for the actual purpose on why you’d want to know the CPU number in the first place.

Which brings us to the actual correct solution. Most people correctly identified the routine responsible for the code, RtlGetCurrentProcessorNumber, which is what kernel32’s GetCurrentProcessorNumber forwards to. Note that the WOW64 version actually forwards to NtGetCurrentProcessorNumber, and that this Native API also does exist on 32-bit versions of Windows, and reads the value stored in the PCR. While this is a simple solution, it involves an expensive system call. So let’s go back to the user-mode Rtl routine. The raw assembly code is as follows:

mov ecx, 03Bh
lsl eax, ecx
shr eax, 0Eh
retn

When I first saw this code, I didn’t even know what the LSL instruction did, as I had never encountered it. The Intel Manual explains that LSL stands for “Load Segment Limit”, which is a nice way to get the limit for a selector in the GDT without actually having access to the GDT itself. 0x3B is a rather weird selector, but I recognized it as 0x38 masked with 0x3. The former is the selector for the TEB, and the latter is called the RPL Mask, and selects the proper ring level (User-Mode is Ring 3, so RPL is 3). Converting this to nice C code using MSVC 2005’s intrinsics and the NDK (which has internal definitions), this function looks something like:

ULONG
RtlGetCurrentProcessorNumber(VOID)
{
ULONG SegmentLimit;

//
// Get the current segment limit of the TEB
//
SegmentLimit = __segmentlimit(KGDT_R3_TEB | RPL_MASK);

//
// Get the CPU number from the limit. Each processor has its TEB
// selector with a limit composed of the CPU number in the 14th to 19th bits.
//
return (SegmentLimit >> 14);
}

This explains what the code does, and in some sense, how it does it. However, what exactly is the CPU number doing there? Is this some sort of x86 feature? Is it added during each context switch, at boot-up, etc?

The answer lies in the KeStartAllProcessors routine in the kernel, where the following piece of assembly executes:

movÂ Â Â Â ebx, [ebp-2Ch]
movÂ Â Â Â eax, [ebp-328h]
shlÂ Â Â Â eax, 0Eh
movÂ Â Â Â [ebx+38h], ax
movÂ Â Â Â eax, [ebp-328h]
shlÂ Â Â Â eax, 0Eh
xorÂ Â Â Â eax, [ebx+3Ch]
andÂ Â Â Â eax, 0F0000h
xorÂ Â Â Â [ebx+3Ch], eax

With some help from IDA, we can make this a bit nicer and update some lines:

INIT:008F6605Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â movÂ Â Â Â ebx, [ebp+ProcessorState.SpecialRegisters.Gdtr.HighWord]
INIT:008F66D6Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â movÂ Â Â Â eax, [ebp+i]

And of course, [ebx+38h] is the KGDT_R3_TEB entry in the GDT. Because this routine initializes all processors, it loops them, and i contains the current CPU number in the loop. The processor state contains the pointer to the actual GDT for this processor. Therefore, this is a specific hack that was added, and is fully dependent on the OS, which has to be Windows 2003 or newer.

Finally, on x64 versions, the selector used is actually 0x53, based on the 0x50 TEB selector in 64-bit mode. In WOW64 however, a fake WOW system call to NtGetCurrentProcessorNumber is done instead.

Full credit for this hack and the code behind it should go to Neill Clift, who came up with it.

January 5, 2007January 8, 2007

Challenge of the Week (Month?)

Here’s a nice challenge question I got from a very ingenious developer working at Microsoft… now that I’ve found the solution, I thought I should ask it out in the open.

Correct, complete and full answer gets you a nice prize [ie: your name and solution published ;)].

Find the fastest (total cycles) and smallest (total size) method of obtaining the current CPU number that current thread is executing on, on a Windows 2003 or higher computer (ie: this solution can take advantage of any API or system improvements added to NT 5.2+).

You may use an API call if you wish, but be aware that the actual call and stack operations will count in your total.
You may duplicate the contents of an API call, but be aware that you must explain what your code does in detail. Inlining an API you understand nothing about is not a complete solution.
Code must work from user-mode. You can write a kernel driver or user a native function, but the total cycles spend on the ring transition will be factored in your total, plus any size of code spent in kernel-mode.

Email solutions to aionescu at gmail dot com. Posts questions in the comments if you have any.

January 4, 2007January 4, 2007

Heap Tagging is Broken

While developping the Native Development Library (NDL) that I’m working on, I attempted to play with a very undocumented feature of the Rtl Heap APIs: Tagging.

If you’ve used the familiar ExAllocatePool APIs in kernel-mode, then you’re already familiar with tagging. The Heap Manager supports the same idea, but allows you to define your own string tags of arbitrary size. This is done by a rather complex set of global flags, special APIs with strange string formatting (RtlCreateTagHeap), and a hidden little macro in winnt.h. Here’s how heap tagging works in the NDL:

A function called NdlpAllocateMemoryInternal allows the caller (the NDL) to allocate memory from the NDL Heap with a specific size, flags, and tag. The tag here is an index that we can define ourselves, such as NDL_STRING_TAG which is 0x2. Then, the NDL has other internal and/or external functions which allocate memory. For example, the LPC routines need to allocate PORT_MESSAGEs or other structures, so NDL_COMMUNICATIONS_TAG is used when calling NdlpAllocateMemoryInternal. There is also NdlpAllocateString, which uses NDL_STRING_TAG. Finally, users of the NDL (your application itself) gets an API called NdlAllocateMemory. You only provide the size and flags, and internally the NDL will set the NDL_USER_TAG to your allocation.

So far so good.

Now there’s two cool things we can do. First, the RtlQueryTagHeap API allows you to obtain statistics on each tag. Allocations, frees, and bytes allocated. This can give you a nice memory map of the NDL’s current memory usage. Even better however, by using RtlWalkHeap, the NDL can scan for all active NDL_USER_TAG allocations. This is useful, since when your native application returns, an internal call to NdlUnregisterApplication is made. When this happens, the assumption is made that your code is done executing (unless you’ve registered as a “resident” application), so in order to promote good programming and to catch leaks, RtlWalkHeap is called, and all active heap entries are scanned. If a block with the NDL_USER_TAG tag index is found, a debug message is printed out, saying that a heap entry at 0xFOO of size 0xBAR is leaking. We can then use the User-Mode Stack Trace Database support and the AllocatorBackTraceIndex of the heap entry to give a complete stack trace on where this allocation was made.

So far so good. Or Not.

Turns out I was getting Tag Indeces such as 0x8007, 0x8004, etc. It seems that all heap allocations were instead indexed with 0x8000 | CurrentAllocationIndex. This wasn’t helpful at all, so I started analyzing the problem.

The first one is the way in which heap tags are generated and then saved. To generate a tag, you use the MAKE_HEAP_TAG macro in winnt.h. This macro takes a “Tag base”, which is what RtlCreateTagHeap returns to you, as well as a tag index, which you define yourself, for example 0x2. The operation that’s done is Base | (Index << 18). So for index 2, with a base of 0x40000, this gives us 0xC0000. The problem is that when RtlpUpdateTagEntry is done, the code does the following: shr ebx, 12h and ebx, 0FFFF0FFFh EBX contains the heap flags, which are the actual HEAP_XXX flags ORed with the tag. Suppose we didn't use any flags, and are just sending our heap tag, 0xC0000. The result of this operation will be 3, not 2, because nothing is done to take into account the heap tag base. However, this bug should cause us to get tag indeces that are off-by-one, not in the 0x8000 range. So more must be going on. Recall that ebx also contains the typical heap flags. SomeÂ heap flags are as small as 0x8, others are bigger such as 0x100, and others yet are as high as 0x40000000. You can start seeing how this can corrupt this check. To make matters worse, when using a stack trace database, the heap understands that it's working in "debugging mode", so it calls a different set of APIs, such as RtlAllocateHeapSlowly and RtlDebugAllocateHeap. The latter ORs in some flags by default, such as Heap->ForceFlags, as well as HEAP_DISABLE_VALIDATION_CHECKS and HEAP_USER_SETTABLE_FLAGS. In my case, the total mask of the flags being ORed in was 0x50100000. Let’s bring in our heap tag, and the total becomes 501C0000. Let’s do the broken EBX code again, and the tag index becomes 0x407. Now RtlpUpdateTagEntry will check if 0x407 is above Heap->HighestTagIndex, and since I’ve created a lot less then 1031 tags, it will think this is a “pseudo-tag”. A pseudo-tag is the combinaiton of HEAP_PSEUDO_TAG_MASK and the curent allocaition index…and you’ve gussed it, that mask is 0x8000.

Thankfully, I was able to find a workaround for the NDL, although not with a small (but not critical) loss of functionality. First, I disabled support for stack backtraces. It makes finding your leak a big harder, but it’s not the end of the world, since this functionality is provided as a small benefit anyway. Since the stack trace functions are exported by Rtl, I will simply modify NdlAllocateMemory to capture the trace by itself. I can then use RtlSetUserFlagsHeap to associate the backtrace index or another similar device. If I want to get more evil, I can probably also play with the _HEAP_ENTRY structure itself and set the backtrace index myself.

The second “fix” was not to use the MAKE_HEAP_TAG macro at all, and ignore the “Tag base”. This solves the off-by-one problem but won’t work very reliably because it can conflict with actual heap flags.

This problem is on Win 2000 and XP. I haven’t checked Windows 2003 or Vista yet, but it’s possible that Vista fixed it after Adrian’s rewrite of code for higher security.

December 29, 2006December 29, 2006

DR (Debug Register) Safety/Reliability and Accounting Features in Windows 2003

As some of you may know, Windows 2000 and even XP suffered from multiple validation/sanitation lacks in DR handling during Context<->Trap Frame conversion. The former is the CONTEXT structure used by Win32, and the latter refers to the KTRAP_FRAME structure used in NT. Many APIs such as Set/GetThreadContext, NtContinue, VDM Stuff, User-mode APCs and User-mode Exception Handling as well as Win32k User-mode Callbacks will eventually convert from one form of the structure to the other. These structures contain the entire CPU state (the KTRAP_FRAME doesn’t contain FPU/NPX Stuff, this is saved on the thread’s kernel stack instead), such as segments, registers and eflags.

You can imagine that a really poorly written kernel would allow you to do something like this in user-mode:

Context.SegCs = KGDT_R0_CODE;
NtSetThreadContext(Thread, &Context); and this would save the Ring 0 CS Selector into the KTRAP_FRAME, which is used when returning back to user-mode, thus giving you Ring 0 access.

Of course, DaveC wasn’t that stupid.

The NT Kernel heavily validates (or “sanitizes” EFLAGS and the fs, ds, es, cs selectors, as well as ensures DR6 and DR7 are valid). However, older versions of Windows did not fully ensure the safety of these registers. In case you didn’t know, the DRs, or Debug Registers, are a series of 32-bit registers on the x86 CPU provided for hardware breakpoints and other debugger support. DR0, 1, 2 and 3 are used to hold the addresses of the hardware breakpoints, while DR6 is a status register, and DR7 is a control register.

Already, you can guess that you really don’t want user-mode to give you kernel-mode pointers in DR0-3. The kernel would be blissfully unware that you’ve just set breakpoints in kernel space, and crash when those pointers were hit. Windows 2000 does validate for this.

However, consider the scenario where the caller sets proper user-mode addresses. The kernel will allow this, and when those pointers are hit, the CPU will do a breakpoint, killing the process if no debugger is attached. Again, I insist that the CPU is entirely responsible for the exception. It has no knowledge of address spaces. This implies that these breakpoint addresses are global for the entire system. Windows 2000 allowed a lower-privilege application to set a debug register on on a specific address that would be hit in a remote process, and then crash that application. Careful crafting would allow the crash to be predictable, and exploitable, such as this advisory demonstrates.

This has long been fixed, and the entire way in which DR registers are handled has also been re-written to protect against some flaws that could happen under VDM or V8086 mode. The DISPATCHER_HEADER has a member called DebugActive, and it’s used for KTHREAD objects. This 1-byte value is actually a mask which represents which DR registers are valid for this thread. The masks are generated as follows:

// Thread Dispatcher Header DebugActive Mask

#define DR_MASK(x) 1 << x

#define DR_ACTIVE_MASK 0x10

#define DR_REG_MASK 0x4F

Notice, since there is no DR4 register, the 0x10 flag is actually used to specify whether debugging is actually active on the thread. Now if we take a look at KeContextToKframes, which converts a CONTEXT to a KTRAP_FRAME, the code is similar to this:

Â Â Â /* Handle the Debug Registers */

Â Â Â if ((ContextFlags & CONTEXT_DEBUG_REGISTERS) == CONTEXT_DEBUG_REGISTERS)

Â Â Â {

Â Â Â Â Â Â Â /* Loop DR registers */

Â Â Â Â Â Â Â for (i = 0; i < 4; i++)

Â Â Â Â Â Â Â {

Â Â Â Â Â Â Â Â Â Â Â /* Sanitize the context DR Address */

Â Â Â Â Â Â Â Â Â Â Â SafeDr = Ke386SanitizeDr(KiDrFromContext(i, Context), PreviousMode);

Â Â Â Â Â Â Â Â Â Â Â /* Save it in the trap frame */

Â Â Â Â Â Â Â Â Â Â Â *KiDrFromTrapFrame(i, TrapFrame) = SafeDr;

Â Â Â Â Â Â Â Â Â Â Â /* Check if this DR address is active and add it in the DR mask */

Â Â Â Â Â Â Â Â Â Â Â if (SafeDr) DrMask |= DR_MASK(i);

Â Â Â Â Â Â Â }

Â Â Â Â Â Â Â /* Now save and sanitize DR6 */

Â Â Â Â Â Â Â TrapFrame->Dr6 = Context->Dr6 & DR6_LEGAL;

Â Â Â Â Â Â Â if (TrapFrame->Dr6) DrMask |= DR_MASK(6);

Â Â Â Â Â Â Â /* Save and sanitize DR7 */

Â Â Â Â Â Â Â TrapFrame->Dr7 = Context->Dr7 & DR7_LEGAL;

Â Â Â Â Â Â Â KiRecordDr7(&TrapFrame->Dr7, &DrMask);

Â Â Â Â Â Â Â /* If we’re in user-mode */

Â Â Â Â Â Â Â if (PreviousMode != KernelMode)

Â Â Â Â Â Â Â {

Â Â Â Â Â Â Â Â Â Â Â /* Save the mask */

Â Â Â Â Â Â Â Â Â Â Â KeGetCurrentThread()->DispatcherHeader.DebugActive = DrMask;

Â Â Â Â Â Â Â }

Â Â Â }

Likewise, the converse function, KeContextFromKframes, uses the following blob:

Â Â Â /* Handle debug registers */

Â Â Â if ((Context->ContextFlags & CONTEXT_DEBUG_REGISTERS) ==

Â Â Â Â Â Â Â CONTEXT_DEBUG_REGISTERS)

Â Â Â {

Â Â Â Â Â Â Â /* Make sure DR7 is valid */

Â Â Â Â Â Â Â if (TrapFrame->Dr7 & ~DR7_RESERVED_MASK)

Â Â Â Â Â Â Â {

Â Â Â Â Â Â Â Â Â Â Â /* Copy the debug registers */

Â Â Â Â Â Â Â Â Â Â Â Context->Dr0 = TrapFrame->Dr0;

Â Â Â Â Â Â Â Â Â Â Â Context->Dr1 = TrapFrame->Dr1;

Â Â Â Â Â Â Â Â Â Â Â Context->Dr2 = TrapFrame->Dr2;

Â Â Â Â Â Â Â Â Â Â Â Context->Dr3 = TrapFrame->Dr3;

Â Â Â Â Â Â Â Â Â Â Â Context->Dr6 = TrapFrame->Dr6;

Â Â Â Â Â Â Â Â Â Â Â /* Update DR7 */

Â Â Â Â Â Â Â Â Â Â Â Context->Dr7 = KiUpdateDr7(TrapFrame->Dr7);

Â Â Â Â Â Â Â }

Â Â Â Â Â Â Â else

Â Â Â Â Â Â Â {

Â Â Â Â Â Â Â Â Â Â Â /* Otherwise clear DR registers */

Â Â Â Â Â Â Â Â Â Â Â Context->Dr0 =

Â Â Â Â Â Â Â Â Â Â Â Context->Dr1 =

Â Â Â Â Â Â Â Â Â Â Â Context->Dr3 =

Â Â Â Â Â Â Â Â Â Â Â Context->Dr6 =

Â Â Â Â Â Â Â Â Â Â Â Context->Dr7 = 0;

Â Â Â Â Â Â Â }

Â Â Â }

This new code ensures not only that DR7 and DR6 are valid, but also clears the DR registers if DR7 is invalid, as well as creates a specific per-thread mask specifying which DR registers are enabled and which are not, which protects from the random activation or use of DR addresses. Also, DR7 specifies which of the DRx registers are actually in use, so this information also needs to be kept into account. The KiUpdate/RecordDr7 routines are shown below:

ULONG

FASTCALL

KiUpdateDr7(IN ULONG Dr7)

{

Â Â Â ULONG DebugMask = KeGetCurrentThread()->DispatcherHeader.DebugActive;

Â Â Â /* Check if debugging is enabled */

Â Â Â if (DebugMask & DR_ACTIVE_MASK)

Â Â Â {

Â Â Â Â Â Â Â /* Sanity checks */

Â Â Â Â Â Â Â ASSERT((DebugMask & DR_REG_MASK) != 0);

Â Â Â Â Â Â Â ASSERT((Dr7 & ~DR7_RESERVED_MASK) == DR7_OVERRIDE_MASK);

Â Â Â Â Â Â Â return 0;

Â Â Â }

Â Â Â /* Return DR7 itself */

Â Â Â return Dr7;

}

BOOLEAN

FASTCALL

KiRecordDr7(OUT PULONG Dr7Ptr,

Â Â Â Â Â Â Â Â Â Â Â OUT PULONG DrMask)

{

Â Â Â ULONG NewMask, Mask;

Â Â Â UCHAR Result;

Â Â Â /* Check if the caller gave us a mask */

Â Â Â if (!DrMask)

Â Â Â {

Â Â Â Â Â Â Â Â /* He didn’t use the one from the thread */

Â Â Â Â Â Â Â Â Mask = KeGetCurrentThread()->DispatcherHeader.DebugActive;

Â Â Â }

Â Â Â else

Â Â Â {

Â Â Â Â Â Â Â /* He did, read it */

Â Â Â Â Â Â Â Mask = *DrMask;

Â Â Â }

Â Â Â /* Sanity check */

Â Â Â ASSERT((*Dr7Ptr & DR7_RESERVED_MASK) == 0);

Â Â Â /* Check if DR7 is empty */

Â Â Â NewMask = Mask;

Â Â Â if (*Dr7Ptr)

Â Â Â {

Â Â Â Â Â Â Â /* Assume failure */

Â Â Â Â Â Â Â Result = FALSE;

Â Â Â Â Â Â Â /* Check the DR mask */

Â Â Â Â Â Â Â NewMask &= 0x7F;

Â Â Â Â Â Â Â if (NewMask & DR_REG_MASK)

Â Â Â Â Â Â Â {

Â Â Â Â Â Â Â Â Â Â Â /* Set the active mask */

Â Â Â Â Â Â Â Â Â Â Â NewMask |= DR_ACTIVE_MASK;

Â Â Â Â Â Â Â Â Â Â Â /* Set DR7 override */

Â Â Â Â Â Â Â Â Â Â Â *DrMask = DR7_OVERRIDE_MASK;

Â Â Â Â Â Â Â }

Â Â Â Â Â Â Â else

Â Â Â Â Â Â Â {

Â Â Â Â Â Â Â Â Â Â Â /* Sanity check */

Â Â Â Â Â Â Â Â Â Â Â ASSERT(NewMask == 0);

Â Â Â Â Â Â Â }

Â Â Â }

Â Â Â else

Â Â Â {

Â Â Â Â Â Â Â /* Check if we have a mask or not */

Â Â Â Â Â Â Â Result = NewMask ? TRUE: FALSE;

Â Â Â Â Â Â Â /* Update the mask to disable debugging */

Â Â Â Â Â Â Â NewMask &= ~DR_ACTIVE_MASK;

Â Â Â Â Â Â Â NewMask |= 0x80;

Â Â Â }

Â Â Â /* Check if caller wants the new mask */

Â Â Â if (DrMask)

Â Â Â {

Â Â Â Â Â Â Â /* Update it */

Â Â Â Â Â Â Â *DrMask = NewMask;

Â Â Â }

Â Â Â else

Â Â Â {

Â Â Â Â Â Â Â /* Check if the mask changed and update it directly */

Â Â Â Â Â Â Â if (Mask != NewMask) KeGetCurrentThread()->DispatcherHeader.DebugActive;

Â Â Â }

Â Â Â /* Return the result */

Â Â Â return Result;

}

The code above is from ReactOS and may contain bugs :). Some Macros/defines are missing but the overall point should be clear. Next time you’re debugging a thread and come across DebugActive having a value that you expected was TRUE or FALSE, hopefully this should give you some insight.

On another note, I have started working on the NDK article and hope to finish it by tomorrow.