From 6f2e7c009c24830d4f08633bfbde3b75f40bf215 Mon Sep 17 00:00:00 2001 From: Benjamin Bellamy Date: Wed, 30 Mar 2022 16:09:06 +0000 Subject: [PATCH] feat(analytics): add current date and secret salt to analytics hash for improved privacy --- app/Helpers/misc_helper.php | 18 +++++++ modules/Analytics/Config/Analytics.php | 13 +++++ .../Analytics/Helpers/analytics_helper.php | 48 +++++++++++-------- .../Install/Controllers/InstallController.php | 6 ++- 4 files changed, 64 insertions(+), 21 deletions(-) diff --git a/app/Helpers/misc_helper.php b/app/Helpers/misc_helper.php index bc3b66d9..f4b0a602 100644 --- a/app/Helpers/misc_helper.php +++ b/app/Helpers/misc_helper.php @@ -213,6 +213,24 @@ if (! function_exists('podcast_uuid')) { //-------------------------------------------------------------------- +if (! function_exists('generate_random_salt')) { + function generate_random_salt(int $length = 64): string + { + $salt = ''; + while (strlen($salt) < $length) { + $charNumber = random_int(33, 126); + // Exclude " ' \ ` + if (! in_array($charNumber, [34, 39, 92, 96], true)) { + $salt .= chr($charNumber); + } + } + + return $salt; + } +} + +//-------------------------------------------------------------------- + if (! function_exists('file_upload_max_size')) { diff --git a/modules/Analytics/Config/Analytics.php b/modules/Analytics/Config/Analytics.php index 578d8262..ab263a08 100644 --- a/modules/Analytics/Config/Analytics.php +++ b/modules/Analytics/Config/Analytics.php @@ -25,6 +25,19 @@ class Analytics extends BaseConfig 'analytics-filtered-data' => 'permission:podcasts-view,podcast-view', ]; + /** + * -------------------------------------------------------------------------- + * Secret Salt + * -------------------------------------------------------------------------- + * + * The secret salt is a string of random characters that is used when hashing data. + * Each Castopod instance has its own secret salt so keys will never be the same. + * + * Example: + * Z&|qECKBrwgaaD>~;U/tXG1U%tSe_oi5Tzy)h>}5NC2npSrjvM0w_Q>cs=0o=H]* + */ + public string $salt = ''; + /** * get the full audio file url * diff --git a/modules/Analytics/Helpers/analytics_helper.php b/modules/Analytics/Helpers/analytics_helper.php index c2dd24d8..5a684c36 100644 --- a/modules/Analytics/Helpers/analytics_helper.php +++ b/modules/Analytics/Helpers/analytics_helper.php @@ -241,12 +241,12 @@ if (! function_exists('podcast_hit')) { * Counting podcast episode downloads for analytic purposes ✅ No IP address is ever stored on the server. ✅ Only * aggregate data is stored in the database. We follow IAB Podcast Measurement Technical Guidelines Version 2.0: * https://iabtechlab.com/standards/podcast-measurement-guidelines/ - * https://iabtechlab.com/wp-content/uploads/2017/12/Podcast_Measurement_v2-Dec-20-2017.pdf ✅ Rolling 24-hour - * window ✅ Castopod does not do pre-load ✅ IP deny list https://github.com/client9/ipcat ✅ User-agent - * Filtering https://github.com/opawg/user-agents ✅ RSS User-agent https://github.com/opawg/podcast-rss-useragents - * ✅ Ignores 2 bytes range "Range: 0-1" (performed by official Apple iOS Podcast app) ✅ In case of partial - * content, adds up all requests to check >1mn was downloaded ✅ Identifying Uniques is done with a combination of - * IP Address and User Agent + * https://iabtechlab.com/wp-content/uploads/2017/12/Podcast_Measurement_v2-Dec-20-2017.pdf ✅ 24-hour window ✅ + * Castopod does not do pre-load ✅ IP deny list https://github.com/client9/ipcat ✅ User-agent Filtering + * https://github.com/opawg/user-agents ✅ RSS User-agent https://github.com/opawg/podcast-rss-useragents ✅ + * Ignores 2 bytes range "Range: 0-1" (performed by official Apple iOS Podcast app) ✅ In case of partial content, + * adds up all requests to check >1mn was downloaded ✅ Identifying Uniques is done with a combination of IP + * Address and User Agent * * @param integer $podcastId The podcast ID * @param integer $episodeId The Episode ID @@ -280,19 +280,25 @@ if (! function_exists('podcast_hit')) { ? $_SERVER['HTTP_RANGE'] : null; - // We create a sha1 hash for this IP_Address+User_Agent+Episode_ID (used to count only once multiple episode downloads): - $episodeHashId = - '_IpUaEp_' . - sha1($_SERVER['REMOTE_ADDR'] . '_' . $_SERVER['HTTP_USER_AGENT'] . '_' . $episodeId); + $salt = config('Analytics') + ->salt; + // We create a sha1 hash for this Salt+Current_Date+IP_Address+User_Agent+Episode_ID (used to count only once multiple episode downloads): + $episodeListenerHashId = + 'Analytics_Episode_' . + sha1( + $salt . '_' . date( + 'Y-m-d' + ) . '_' . $_SERVER['REMOTE_ADDR'] . '_' . $_SERVER['HTTP_USER_AGENT'] . '_' . $episodeId + ); // Was this episode downloaded in the past 24h: - $downloadedBytes = cache($episodeHashId); + $downloadedBytes = cache($episodeListenerHashId); // Rolling window is 24 hours (86400 seconds): $rollingTTL = 86400; if ($downloadedBytes) { // In case it was already downloaded, TTL should be adjusted (rolling window is 24h since 1st download): $rollingTTL = cache() - ->getMetadata($episodeHashId)['expire'] - time(); + ->getMetadata($episodeListenerHashId)['expire'] - time(); } else { // If it was never downloaded that means that zero byte were downloaded: $downloadedBytes = 0; @@ -320,7 +326,7 @@ if (! function_exists('podcast_hit')) { // We save the number of downloaded bytes for this user and this episode: cache() - ->save($episodeHashId, $downloadedBytes, $rollingTTL); + ->save($episodeListenerHashId, $downloadedBytes, $rollingTTL); // If more that 1mn was downloaded, that's a hit, we send that to the database: if ($downloadedBytes >= $bytesThreshold) { @@ -329,13 +335,17 @@ if (! function_exists('podcast_hit')) { $age = intdiv(time() - $publicationTime, 86400); - // We create a sha1 hash for this IP_Address+User_Agent+Podcast_ID (used to count unique listeners): - $listenerHashId = - '_IpUaPo_' . - sha1($_SERVER['REMOTE_ADDR'] . '_' . $_SERVER['HTTP_USER_AGENT'] . '_' . $podcastId); + // We create a sha1 hash for this Salt+Current_Date+IP_Address+User_Agent+Podcast_ID (used to count unique listeners): + $podcastListenerHashId = + 'Analytics_Podcast_' . + sha1( + $salt . '_' . date( + 'Y-m-d' + ) . '_' . $_SERVER['REMOTE_ADDR'] . '_' . $_SERVER['HTTP_USER_AGENT'] . '_' . $podcastId + ); $newListener = 1; // Has this listener already downloaded an episode today: - $downloadsByUser = cache($listenerHashId); + $downloadsByUser = cache($podcastListenerHashId); // We add one download if ($downloadsByUser) { $newListener = 0; @@ -348,7 +358,7 @@ if (! function_exists('podcast_hit')) { $midnightTTL = strtotime('tomorrow') - time(); // We save the download count for this user until midnight: cache() - ->save($listenerHashId, $downloadsByUser, $midnightTTL); + ->save($podcastListenerHashId, $downloadsByUser, $midnightTTL); $db->query( "CALL {$procedureName}(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);", diff --git a/modules/Install/Controllers/InstallController.php b/modules/Install/Controllers/InstallController.php index 34209a34..769fb90f 100644 --- a/modules/Install/Controllers/InstallController.php +++ b/modules/Install/Controllers/InstallController.php @@ -31,7 +31,7 @@ class InstallController extends Controller /** * @var string[] */ - protected $helpers = ['form', 'components', 'svg']; + protected $helpers = ['form', 'components', 'svg', 'misc']; /** * Constructor. @@ -72,7 +72,7 @@ class InstallController extends Controller // Check if the created .env file is writable to continue install process if (is_really_writable(ROOTPATH . '.env')) { try { - $dotenv->required(['app.baseURL', 'admin.gateway', 'auth.gateway']); + $dotenv->required(['app.baseURL', 'analytics.salt', 'admin.gateway', 'auth.gateway']); } catch (ValidationException) { // form to input instance configuration return $this->instanceConfig(); @@ -99,6 +99,7 @@ class InstallController extends Controller try { $dotenv->required([ 'app.baseURL', + 'analytics.salt', 'admin.gateway', 'auth.gateway', 'database.default.hostname', @@ -169,6 +170,7 @@ class InstallController extends Controller 'app.baseURL' => $baseUrl, 'app.mediaBaseURL' => $mediaBaseUrl === '' ? $baseUrl : $mediaBaseUrl, + 'analytics.salt' => generate_random_salt(64), 'admin.gateway' => $this->request->getPost('admin_gateway'), 'auth.gateway' => $this->request->getPost('auth_gateway'), ]);