Enhance log sanitization with GitHub, GitLab, AWS, and generic URL passwords
Consolidate all PII/secret sanitization into remove_iip() to protect real-time logs in addition to exported logs. Add detection for GitHub tokens (ghp_, gho_, ghu_, ghs_, ghr_), GitLab tokens (glpat-, glcbt-, glrt-), AWS credentials (AKIA/ABIA/ACCA/ASIA access keys and secret keys), and generic URL passwords for FTP, SSH, AMQP, LDAP, and S3 protocols. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
96f2e81191
commit
5e3593e8bf
5 changed files with 144 additions and 27 deletions
|
|
@ -269,9 +269,41 @@ function remove_iip($text)
|
|||
// Ensure the input is valid UTF-8 before processing
|
||||
$text = sanitize_utf8_text($text);
|
||||
|
||||
// Git access tokens
|
||||
$text = preg_replace('/x-access-token:.*?(?=@)/', 'x-access-token:'.REDACTED, $text);
|
||||
|
||||
return preg_replace('/\x1b\[[0-9;]*m/', '', $text);
|
||||
// ANSI color codes
|
||||
$text = preg_replace('/\x1b\[[0-9;]*m/', '', $text);
|
||||
|
||||
// Generic URLs with passwords (covers database URLs, ftp, amqp, ssh, etc.)
|
||||
// (protocol://user:password@host → protocol://user:<REDACTED>@host)
|
||||
$text = preg_replace('/((?:postgres|mysql|mongodb|rediss?|mariadb|ftp|sftp|ssh|amqp|amqps|ldap|ldaps|s3):\/\/[^:]+:)[^@]+(@)/i', '$1'.REDACTED.'$2', $text);
|
||||
|
||||
// Email addresses
|
||||
$text = preg_replace('/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/', REDACTED, $text);
|
||||
|
||||
// Bearer/JWT tokens
|
||||
$text = preg_replace('/Bearer\s+[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+/i', 'Bearer '.REDACTED, $text);
|
||||
|
||||
// GitHub tokens (ghp_ = personal, gho_ = OAuth, ghu_ = user-to-server, ghs_ = server-to-server, ghr_ = refresh)
|
||||
$text = preg_replace('/\b(gh[pousr]_[A-Za-z0-9_]{36,})\b/', REDACTED, $text);
|
||||
|
||||
// GitLab tokens (glpat- = personal access token, glcbt- = CI build token, glrt- = runner token)
|
||||
$text = preg_replace('/\b(gl(?:pat|cbt|rt)-[A-Za-z0-9\-_]{20,})\b/', REDACTED, $text);
|
||||
|
||||
// AWS credentials (Access Key ID starts with AKIA, ABIA, ACCA, ASIA)
|
||||
$text = preg_replace('/\b(A(?:KIA|BIA|CCA|SIA)[A-Z0-9]{16})\b/', REDACTED, $text);
|
||||
|
||||
// AWS Secret Access Key (40 character base64-ish string, typically follows access key)
|
||||
$text = preg_replace('/(aws_secret_access_key|AWS_SECRET_ACCESS_KEY)[=:]\s*[\'"]?([A-Za-z0-9\/+=]{40})[\'"]?/i', '$1='.REDACTED, $text);
|
||||
|
||||
// API keys (common patterns)
|
||||
$text = preg_replace('/(api[_-]?key|apikey|api[_-]?secret|secret[_-]?key)[=:]\s*[\'"]?[A-Za-z0-9\-_]{16,}[\'"]?/i', '$1='.REDACTED, $text);
|
||||
|
||||
// Private key blocks
|
||||
$text = preg_replace('/-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/', REDACTED, $text);
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -674,26 +674,8 @@ function removeAnsiColors($text)
|
|||
|
||||
function sanitizeLogsForExport(string $text): string
|
||||
{
|
||||
// Use existing helper for tokens and ANSI codes
|
||||
$text = remove_iip($text);
|
||||
|
||||
// Database URLs with passwords - must run before email regex to prevent false matches
|
||||
// (postgres://user:password@host → postgres://user:<REDACTED>@host)
|
||||
$text = preg_replace('/((?:postgres|mysql|mongodb|rediss?|mariadb):\/\/[^:]+:)[^@]+(@)/i', '$1'.REDACTED.'$2', $text);
|
||||
|
||||
// Email addresses
|
||||
$text = preg_replace('/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/', REDACTED, $text);
|
||||
|
||||
// Bearer/JWT tokens
|
||||
$text = preg_replace('/Bearer\s+[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+/i', 'Bearer '.REDACTED, $text);
|
||||
|
||||
// API keys (common patterns)
|
||||
$text = preg_replace('/(api[_-]?key|apikey|api[_-]?secret|secret[_-]?key)[=:]\s*[\'"]?[A-Za-z0-9\-_]{16,}[\'"]?/i', '$1='.REDACTED, $text);
|
||||
|
||||
// Private key blocks
|
||||
$text = preg_replace('/-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/', REDACTED, $text);
|
||||
|
||||
return $text;
|
||||
// All sanitization is now handled by remove_iip()
|
||||
return remove_iip($text);
|
||||
}
|
||||
|
||||
function getTopLevelNetworks(Service|Application $resource)
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -101,3 +101,70 @@
|
|||
expect($result)->not->toContain('12345678901234567890');
|
||||
expect($result)->toContain(REDACTED);
|
||||
});
|
||||
|
||||
it('removes GitHub tokens', function () {
|
||||
$testCases = [
|
||||
'ghp_aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789' => 'ghp_ personal access token',
|
||||
'gho_aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789' => 'gho_ OAuth token',
|
||||
'ghu_aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789' => 'ghu_ user-to-server token',
|
||||
'ghs_aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789' => 'ghs_ server-to-server token',
|
||||
'ghr_aBcDeFgHiJkLmNoPqRsTuVwXyZ0123456789' => 'ghr_ refresh token',
|
||||
];
|
||||
|
||||
foreach ($testCases as $token => $description) {
|
||||
$input = "Token: {$token}";
|
||||
$result = sanitizeLogsForExport($input);
|
||||
expect($result)->not->toContain($token, "Failed to redact {$description}");
|
||||
expect($result)->toContain(REDACTED);
|
||||
}
|
||||
});
|
||||
|
||||
it('removes GitLab tokens', function () {
|
||||
$testCases = [
|
||||
'glpat-aBcDeFgHiJkLmNoPqRsTu' => 'glpat- personal access token',
|
||||
'glcbt-aBcDeFgHiJkLmNoPqRsTu' => 'glcbt- CI build token',
|
||||
'glrt-aBcDeFgHiJkLmNoPqRsTuV' => 'glrt- runner token',
|
||||
];
|
||||
|
||||
foreach ($testCases as $token => $description) {
|
||||
$input = "Token: {$token}";
|
||||
$result = sanitizeLogsForExport($input);
|
||||
expect($result)->not->toContain($token, "Failed to redact {$description}");
|
||||
expect($result)->toContain(REDACTED);
|
||||
}
|
||||
});
|
||||
|
||||
it('removes AWS credentials', function () {
|
||||
// AWS Access Key ID (starts with AKIA, ABIA, ACCA, or ASIA)
|
||||
$accessKeyId = 'AKIAIOSFODNN7EXAMPLE';
|
||||
$input = "AWS_ACCESS_KEY_ID={$accessKeyId}";
|
||||
$result = sanitizeLogsForExport($input);
|
||||
|
||||
expect($result)->not->toContain($accessKeyId);
|
||||
expect($result)->toContain(REDACTED);
|
||||
});
|
||||
|
||||
it('removes AWS secret access key', function () {
|
||||
$secretKey = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY';
|
||||
$input = "aws_secret_access_key={$secretKey}";
|
||||
$result = sanitizeLogsForExport($input);
|
||||
|
||||
expect($result)->not->toContain($secretKey);
|
||||
expect($result)->toContain('aws_secret_access_key='.REDACTED);
|
||||
});
|
||||
|
||||
it('removes generic URL passwords', function () {
|
||||
$testCases = [
|
||||
'ftp://user:ftppass@ftp.example.com/path' => 'ftp://user:'.REDACTED.'@ftp.example.com/path',
|
||||
'sftp://deploy:secret123@sftp.example.com' => 'sftp://deploy:'.REDACTED.'@sftp.example.com',
|
||||
'ssh://git:sshpass@git.example.com/repo' => 'ssh://git:'.REDACTED.'@git.example.com/repo',
|
||||
'amqp://rabbit:bunny123@rabbitmq:5672' => 'amqp://rabbit:'.REDACTED.'@rabbitmq:5672',
|
||||
'ldap://admin:ldappass@ldap.example.com' => 'ldap://admin:'.REDACTED.'@ldap.example.com',
|
||||
's3://access:secretkey@bucket.s3.amazonaws.com' => 's3://access:'.REDACTED.'@bucket.s3.amazonaws.com',
|
||||
];
|
||||
|
||||
foreach ($testCases as $input => $expected) {
|
||||
$result = sanitizeLogsForExport($input);
|
||||
expect($result)->toBe($expected);
|
||||
}
|
||||
});
|
||||
|
|
|
|||
Loading…
Reference in a new issue