Add two HyperLogLog functions

New functions initHyperLogLogError() and freeHyperLogLog() simplify
using this module from elsewhere.

Author: Tomáš Vondra
Review: Peter Geoghegan
This commit is contained in:
Alvaro Herrera 2016-01-19 17:40:15 -03:00
parent 9ff60273e3
commit 948c97958b
2 changed files with 49 additions and 1 deletions

View File

@ -56,7 +56,7 @@
static inline uint8 rho(uint32 x, uint8 b);
/*
* Initialize HyperLogLog track state
* Initialize HyperLogLog track state, by bit width
*
* bwidth is bit width (so register size will be 2 to the power of bwidth).
* Must be between 4 and 16 inclusive.
@ -107,6 +107,52 @@ initHyperLogLog(hyperLogLogState *cState, uint8 bwidth)
cState->alphaMM = alpha * cState->nRegisters * cState->nRegisters;
}
/*
* Initialize HyperLogLog track state, by error rate
*
* Instead of specifying bwidth (number of bits used for addressing the
* register), this method allows sizing the counter for particular error
* rate using a simple formula from the paper:
*
* e = 1.04 / sqrt(m)
*
* where 'm' is the number of registers, i.e. (2^bwidth). The method
* finds the lowest bwidth with 'e' below the requested error rate, and
* then uses it to initialize the counter.
*
* As bwidth has to be between 4 and 16, the worst possible error rate
* is between ~25% (bwidth=4) and 0.4% (bwidth=16).
*/
void
initHyperLogLogError(hyperLogLogState *cState, double error)
{
uint8 bwidth = 4;
while (bwidth < 16)
{
double m = (Size) 1 << bwidth;
if (1.04 / sqrt(m) < error)
break;
bwidth++;
}
initHyperLogLog(cState, bwidth);
}
/*
* Free HyperLogLog track state
*
* Releases allocated resources, but not the state itself (in case it's not
* allocated by palloc).
*/
void
freeHyperLogLog(hyperLogLogState *cState)
{
Assert(cState->hashesArr != NULL);
pfree(cState->hashesArr);
}
/*
* Adds element to the estimator, from caller-supplied hash.
*

View File

@ -60,8 +60,10 @@ typedef struct hyperLogLogState
} hyperLogLogState;
extern void initHyperLogLog(hyperLogLogState *cState, uint8 bwidth);
extern void initHyperLogLogError(hyperLogLogState *cState, double error);
extern void addHyperLogLog(hyperLogLogState *cState, uint32 hash);
extern double estimateHyperLogLog(hyperLogLogState *cState);
extern void mergeHyperLogLog(hyperLogLogState *cState, const hyperLogLogState *oState);
extern void freeHyperLogLog(hyperLogLogState *cState);
#endif /* HYPERLOGLOG_H */