Add bit_count SQL function

This function for bit and bytea counts the set bits in the bit or byte
string.  Internally, we use the existing popcount functionality.

For the name, after some discussion, we settled on bit_count, which
also exists with this meaning in MySQL, Java, and Python.

Author: David Fetter <david@fetter.org>
Discussion: https://www.postgresql.org/message-id/flat/20201230105535.GJ13234@fetter.org
This commit is contained in:
Peter Eisentraut 2021-03-23 08:45:51 +01:00
parent 5aed6a1fc2
commit a6715af1e7
9 changed files with 97 additions and 1 deletions

View File

@ -4010,6 +4010,28 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
</thead>
<tbody>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
<primary>bit_count</primary>
</indexterm>
<indexterm>
<primary>popcount</primary>
<see>bit_count</see>
</indexterm>
<function>bit_count</function> ( <parameter>bytes</parameter> <type>bytea</type> )
<returnvalue>bigint</returnvalue>
</para>
<para>
Returns the number of bits set in the binary string (also known as
<quote>popcount</quote>).
</para>
<para>
<literal>bit_count('\x1234567890'::bytea)</literal>
<returnvalue>31</returnvalue>
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
@ -4714,6 +4736,24 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
</thead>
<tbody>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
<primary>bit_count</primary>
</indexterm>
<function>bit_count</function> ( <type>bit</type> )
<returnvalue>bigint</returnvalue>
</para>
<para>
Returns the number of bits set in the bit string (also known as
<quote>popcount</quote>).
</para>
<para>
<literal>bit_count(B'10111')</literal>
<returnvalue>4</returnvalue>
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>

View File

@ -36,6 +36,7 @@
#include "libpq/pqformat.h"
#include "nodes/nodeFuncs.h"
#include "nodes/supportnodes.h"
#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/varbit.h"
@ -1201,6 +1202,19 @@ bit_overlay(VarBit *t1, VarBit *t2, int sp, int sl)
return result;
}
/*
* bit_count
*
* Returns the number of bits set in a bit string.
*/
Datum
bit_bit_count(PG_FUNCTION_ARGS)
{
VarBit *arg = PG_GETARG_VARBIT_P(0);
PG_RETURN_INT64(pg_popcount((char *) VARBITS(arg), VARBITBYTES(arg)));
}
/*
* bitlength, bitoctetlength
* Return the length of a bit string

View File

@ -3440,6 +3440,17 @@ bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
return result;
}
/*
* bit_count
*/
Datum
bytea_bit_count(PG_FUNCTION_ARGS)
{
bytea *t1 = PG_GETARG_BYTEA_PP(0);
PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
}
/*
* byteapos -
* Return the position of the specified substring.

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202103231
#define CATALOG_VERSION_NO 202103232
#endif

View File

@ -1446,6 +1446,9 @@
{ oid => '752', descr => 'substitute portion of string',
proname => 'overlay', prorettype => 'bytea',
proargtypes => 'bytea bytea int4', prosrc => 'byteaoverlay_no_len' },
{ oid => '8436', descr => 'number of set bits',
proname => 'bit_count', prorettype => 'int8', proargtypes => 'bytea',
prosrc => 'bytea_bit_count'},
{ oid => '725',
proname => 'dist_pl', prorettype => 'float8', proargtypes => 'point line',
@ -3876,6 +3879,9 @@
{ oid => '3033', descr => 'set bit',
proname => 'set_bit', prorettype => 'bit', proargtypes => 'bit int4 int4',
prosrc => 'bitsetbit' },
{ oid => '8435', descr => 'number of set bits',
proname => 'bit_count', prorettype => 'int8', proargtypes => 'bit',
prosrc => 'bit_bit_count'},
# for macaddr type support
{ oid => '436', descr => 'I/O',

View File

@ -710,6 +710,19 @@ SELECT overlay(B'0101011100' placing '001' from 20);
0101011100001
(1 row)
-- bit_count
SELECT bit_count(B'0101011100'::bit(10));
bit_count
-----------
5
(1 row)
SELECT bit_count(B'1111111111'::bit(10));
bit_count
-----------
10
(1 row)
-- This table is intentionally left around to exercise pg_dump/pg_upgrade
CREATE TABLE bit_defaults(
b1 bit(4) DEFAULT '1001',

View File

@ -2227,3 +2227,9 @@ SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5
Th\000o\x02\x03
(1 row)
SELECT bit_count('\x1234567890'::bytea);
bit_count
-----------
31
(1 row)

View File

@ -215,6 +215,10 @@ SELECT overlay(B'0101011100' placing '101' from 6);
SELECT overlay(B'0101011100' placing '001' from 11);
SELECT overlay(B'0101011100' placing '001' from 20);
-- bit_count
SELECT bit_count(B'0101011100'::bit(10));
SELECT bit_count(B'1111111111'::bit(10));
-- This table is intentionally left around to exercise pg_dump/pg_upgrade
CREATE TABLE bit_defaults(
b1 bit(4) DEFAULT '1001',

View File

@ -742,3 +742,5 @@ SELECT btrim(E'\\000trim\\000'::bytea, ''::bytea);
SELECT encode(overlay(E'Th\\000omas'::bytea placing E'Th\\001omas'::bytea from 2),'escape');
SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 8),'escape');
SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5 for 3),'escape');
SELECT bit_count('\x1234567890'::bytea);