Enhance log sanitization with GitHub, GitLab, AWS, and generic URL passwords

Consolidate all PII/secret sanitization into remove_iip() to protect real-time logs in addition to exported logs. Add detection for GitHub tokens (ghp_, gho_, ghu_, ghs_, ghr_), GitLab tokens (glpat-, glcbt-, glrt-), AWS credentials (AKIA/ABIA/ACCA/ASIA access keys and secret keys), and generic URL passwords for FTP, SSH, AMQP, LDAP, and S3 protocols.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
Andras Bacsai 2025-12-17 17:59:10 +01:00
parent 96f2e81191
commit 5e3593e8bf
5 changed files with 144 additions and 27 deletions

View file

@ -269,9 +269,41 @@ function remove_iip($text)
// Ensure the input is valid UTF-8 before processing
$text = sanitize_utf8_text($text);
// Git access tokens
$text = preg_replace('/x-access-token:.*?(?=@)/', 'x-access-token:'.REDACTED, $text);
return preg_replace('/\x1b\[[0-9;]*m/', '', $text);
// ANSI color codes
$text = preg_replace('/\x1b\[[0-9;]*m/', '', $text);
// Generic URLs with passwords (covers database URLs, ftp, amqp, ssh, etc.)
// (protocol://user:password@host → protocol://user:<REDACTED>@host)
$text = preg_replace('/((?:postgres|mysql|mongodb|rediss?|mariadb|ftp|sftp|ssh|amqp|amqps|ldap|ldaps|s3):\/\/[^:]+:)[^@]+(@)/i', '$1'.REDACTED.'$2', $text);
// Email addresses
$text = preg_replace('/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/', REDACTED, $text);
// Bearer/JWT tokens
$text = preg_replace('/Bearer\s+[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+/i', 'Bearer '.REDACTED, $text);
// GitHub tokens (ghp_ = personal, gho_ = OAuth, ghu_ = user-to-server, ghs_ = server-to-server, ghr_ = refresh)
$text = preg_replace('/\b(gh[pousr]_[A-Za-z0-9_]{36,})\b/', REDACTED, $text);
// GitLab tokens (glpat- = personal access token, glcbt- = CI build token, glrt- = runner token)
$text = preg_replace('/\b(gl(?:pat|cbt|rt)-[A-Za-z0-9\-_]{20,})\b/', REDACTED, $text);
// AWS credentials (Access Key ID starts with AKIA, ABIA, ACCA, ASIA)
$text = preg_replace('/\b(A(?:KIA|BIA|CCA|SIA)[A-Z0-9]{16})\b/', REDACTED, $text);
// AWS Secret Access Key (40 character base64-ish string, typically follows access key)
$text = preg_replace('/(aws_secret_access_key|AWS_SECRET_ACCESS_KEY)[=:]\s*[\'"]?([A-Za-z0-9\/+=]{40})[\'"]?/i', '$1='.REDACTED, $text);
// API keys (common patterns)
$text = preg_replace('/(api[_-]?key|apikey|api[_-]?secret|secret[_-]?key)[=:]\s*[\'"]?[A-Za-z0-9\-_]{16,}[\'"]?/i', '$1='.REDACTED, $text);
// Private key blocks
$text = preg_replace('/-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/', REDACTED, $text);
return $text;
}
/**

View file

@ -674,26 +674,8 @@ function removeAnsiColors($text)
function sanitizeLogsForExport(string $text): string
{
// Use existing helper for tokens and ANSI codes
$text = remove_iip($text);
// Database URLs with passwords - must run before email regex to prevent false matches
// (postgres://user:password@host → postgres://user:<REDACTED>@host)
$text = preg_replace('/((?:postgres|mysql|mongodb|rediss?|mariadb):\/\/[^:]+:)[^@]+(@)/i', '$1'.REDACTED.'$2', $text);
// Email addresses
$text = preg_replace('/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/', REDACTED, $text);
// Bearer/JWT tokens
$text = preg_replace('/Bearer\s+[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+/i', 'Bearer '.REDACTED, $text);
// API keys (common patterns)
$text = preg_replace('/(api[_-]?key|apikey|api[_-]?secret|secret[_-]?key)[=:]\s*[\'"]?[A-Za-z0-9\-_]{16,}[\'"]?/i', '$1='.REDACTED, $text);
// Private key blocks
$text = preg_replace('/-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/', REDACTED, $text);
return $text;
// All sanitization is now handled by remove_iip()
return remove_iip($text);
}
function getTopLevelNetworks(Service|Application $resource)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -101,3 +101,70 @@
expect($result)->not->toContain('12345678901234567890');
expect($result)->toContain(REDACTED);
});
it('removes GitHub tokens', function () {
$testCases = [
'ghp_aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789' => 'ghp_ personal access token',
'gho_aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789' => 'gho_ OAuth token',
'ghu_aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789' => 'ghu_ user-to-server token',
'ghs_aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789' => 'ghs_ server-to-server token',
'ghr_aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789' => 'ghr_ refresh token',
];
foreach ($testCases as $token => $description) {
$input = "Token: {$token}";
$result = sanitizeLogsForExport($input);
expect($result)->not->toContain($token, "Failed to redact {$description}");
expect($result)->toContain(REDACTED);
}
});
it('removes GitLab tokens', function () {
$testCases = [
'glpat-aBcDeFgHiJkLmNoPqRsTu' => 'glpat- personal access token',
'glcbt-aBcDeFgHiJkLmNoPqRsTu' => 'glcbt- CI build token',
'glrt-aBcDeFgHiJkLmNoPqRsTuV' => 'glrt- runner token',
];
foreach ($testCases as $token => $description) {
$input = "Token: {$token}";
$result = sanitizeLogsForExport($input);
expect($result)->not->toContain($token, "Failed to redact {$description}");
expect($result)->toContain(REDACTED);
}
});
it('removes AWS credentials', function () {
// AWS Access Key ID (starts with AKIA, ABIA, ACCA, or ASIA)
$accessKeyId = 'AKIAIOSFODNN7EXAMPLE';
$input = "AWS_ACCESS_KEY_ID={$accessKeyId}";
$result = sanitizeLogsForExport($input);
expect($result)->not->toContain($accessKeyId);
expect($result)->toContain(REDACTED);
});
it('removes AWS secret access key', function () {
$secretKey = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY';
$input = "aws_secret_access_key={$secretKey}";
$result = sanitizeLogsForExport($input);
expect($result)->not->toContain($secretKey);
expect($result)->toContain('aws_secret_access_key='.REDACTED);
});
it('removes generic URL passwords', function () {
$testCases = [
'ftp://user:ftppass@ftp.example.com/path' => 'ftp://user:'.REDACTED.'@ftp.example.com/path',
'sftp://deploy:secret123@sftp.example.com' => 'sftp://deploy:'.REDACTED.'@sftp.example.com',
'ssh://git:sshpass@git.example.com/repo' => 'ssh://git:'.REDACTED.'@git.example.com/repo',
'amqp://rabbit:bunny123@rabbitmq:5672' => 'amqp://rabbit:'.REDACTED.'@rabbitmq:5672',
'ldap://admin:ldappass@ldap.example.com' => 'ldap://admin:'.REDACTED.'@ldap.example.com',
's3://access:secretkey@bucket.s3.amazonaws.com' => 's3://access:'.REDACTED.'@bucket.s3.amazonaws.com',
];
foreach ($testCases as $input => $expected) {
$result = sanitizeLogsForExport($input);
expect($result)->toBe($expected);
}
});