Cross-platform aligned allocators, popcount, prefetch.
More...
#include <stdint.h>
#include <stddef.h>
#include <stdlib.h>
Go to the source code of this file.
Cross-platform aligned allocators, popcount, prefetch.
Provides wrappers for:
- posix_memalign or _aligned_malloc/free
- cache prefetch instructions
- optimized 64-bit popcount and block-level popcount
- Author
- lambdaphoenix
- Version
- 0.1.1
- Copyright
- Copyright (c) 2025 lambdaphoenix
◆ cbits_free_aligned()
static void cbits_free_aligned |
( |
void * |
ptr | ) |
|
|
inlinestatic |
Free aligned memory.
On MSVC uses _aligned_free, otherwise standard free.
- Parameters
-
ptr | Pointer returned by cbits_malloc_aligned. |
◆ cbits_malloc_aligned()
static void * cbits_malloc_aligned |
( |
size_t |
size, |
|
|
size_t |
align |
|
) |
| |
|
inlinestatic |
Allocate aligned memory.
Uses posix_memalign on POSIX, or _aligned_malloc on MSVC.
- Parameters
-
size | Number of bytes to allocate. |
align | Desired alignment in bytes (must be power of two). |
- Returns
- Pointer to aligned memory, or NULL if allocation failed.
◆ cbits_popcount64()
static uint64_t cbits_popcount64 |
( |
uint64_t |
x | ) |
|
|
inlinestatic |
Count bits set in a 64-bit word.
- Parameters
-
ptr | Pointer to the uint64_t to count bits in. |
- Returns
- Number of set bits in *ptr.
◆ cbits_popcount_block()
static uint64_t cbits_popcount_block |
( |
const uint64_t * |
ptr | ) |
|
|
inlinestatic |
Inline wrapper that calls the current dispatch pointer.
- Parameters
-
ptr | Pointer to 8 contiguous uint64_t words. |
- Returns
- Total popcount as computed by the best available impl.
◆ cbits_popcount_block_fallback()
uint64_t cbits_popcount_block_fallback |
( |
const uint64_t * |
ptr | ) |
|
Fallback popcount block implementation.
Processes in 64-bit chunks, summing up cbits_popcount64 for each of 8 words. Used when no vector instructions are available.
- Parameters
-
ptr | Pointer to at least 8 uint64_t words. |
- Returns
- Total popcount of the 8 words.
◆ cbits_prefetch()
static void cbits_prefetch |
( |
const void * |
ptr | ) |
|
|
inlinestatic |
Prefetch a cache line at ptr into L1.
- Parameters
-
◆ init_cpu_dispatch()
void init_cpu_dispatch |
( |
void |
| ) |
|
Constructor to initialize popcount dispatch pointer.
At program start, this function checks CPU support for AVX-512VPOPCNTDQ and AVX2 via __builtin_cpu_supports, then updates cbits_popcount_block_ptr accordingly.
- Parameters
-
◆ cbits_popcount_block_ptr
uint64_t(* cbits_popcount_block_ptr) (const uint64_t *ptr) |
( |
const uint64_t * |
ptr | ) |
|
|
extern |