fix(metrics): address code review feedback for LTTB downsampling
- Wrap return values in collect() to maintain Collection compatibility - Add comment explaining threshold <= 2 prevents division by zero - Refactor tests to use actual Server model method via reflection - Use seeded mt_rand() for reproducible test results 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
f199b6bfc4
commit
0e9dbc3625
2 changed files with 28 additions and 78 deletions
|
|
@ -691,7 +691,7 @@ public function getCpuMetrics(int $mins = 5)
|
|||
$metrics = $this->downsampleLTTB($metrics, 1000);
|
||||
}
|
||||
|
||||
return $metrics;
|
||||
return collect($metrics);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -720,7 +720,7 @@ public function getMemoryMetrics(int $mins = 5)
|
|||
$metrics = $this->downsampleLTTB($metrics, 1000);
|
||||
}
|
||||
|
||||
return $metrics;
|
||||
return collect($metrics);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -736,6 +736,8 @@ private function downsampleLTTB(array $data, int $threshold): array
|
|||
{
|
||||
$dataLength = count($data);
|
||||
|
||||
// Return unchanged if threshold >= data length, or if threshold <= 2
|
||||
// (threshold <= 2 would cause division by zero in bucket calculation)
|
||||
if ($threshold >= $dataLength || $threshold <= 2) {
|
||||
return $data;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,75 +1,17 @@
|
|||
<?php
|
||||
|
||||
use App\Models\Server;
|
||||
|
||||
/**
|
||||
* LTTB (Largest-Triangle-Three-Buckets) algorithm implementation for testing.
|
||||
* This mirrors the implementation in App\Models\Server::downsampleLTTB()
|
||||
* Helper to call the private downsampleLTTB method on Server model via reflection.
|
||||
*/
|
||||
function downsampleLTTB(array $data, int $threshold): array
|
||||
function callDownsampleLTTB(array $data, int $threshold): array
|
||||
{
|
||||
$dataLength = count($data);
|
||||
$server = new Server;
|
||||
$reflection = new ReflectionClass($server);
|
||||
$method = $reflection->getMethod('downsampleLTTB');
|
||||
|
||||
if ($threshold >= $dataLength || $threshold <= 2) {
|
||||
return $data;
|
||||
}
|
||||
|
||||
$sampled = [];
|
||||
$sampled[] = $data[0]; // Always keep first point
|
||||
|
||||
$bucketSize = ($dataLength - 2) / ($threshold - 2);
|
||||
|
||||
$a = 0; // Index of previous selected point
|
||||
|
||||
for ($i = 0; $i < $threshold - 2; $i++) {
|
||||
// Calculate bucket range
|
||||
$bucketStart = (int) floor(($i + 1) * $bucketSize) + 1;
|
||||
$bucketEnd = (int) floor(($i + 2) * $bucketSize) + 1;
|
||||
$bucketEnd = min($bucketEnd, $dataLength - 1);
|
||||
|
||||
// Calculate average point for next bucket (used as reference)
|
||||
$nextBucketStart = (int) floor(($i + 2) * $bucketSize) + 1;
|
||||
$nextBucketEnd = (int) floor(($i + 3) * $bucketSize) + 1;
|
||||
$nextBucketEnd = min($nextBucketEnd, $dataLength - 1);
|
||||
|
||||
$avgX = 0;
|
||||
$avgY = 0;
|
||||
$nextBucketCount = $nextBucketEnd - $nextBucketStart + 1;
|
||||
|
||||
if ($nextBucketCount > 0) {
|
||||
for ($j = $nextBucketStart; $j <= $nextBucketEnd; $j++) {
|
||||
$avgX += $data[$j][0];
|
||||
$avgY += $data[$j][1];
|
||||
}
|
||||
$avgX /= $nextBucketCount;
|
||||
$avgY /= $nextBucketCount;
|
||||
}
|
||||
|
||||
// Find point in current bucket with largest triangle area
|
||||
$maxArea = -1;
|
||||
$maxAreaIndex = $bucketStart;
|
||||
|
||||
$pointAX = $data[$a][0];
|
||||
$pointAY = $data[$a][1];
|
||||
|
||||
for ($j = $bucketStart; $j <= $bucketEnd; $j++) {
|
||||
// Triangle area calculation
|
||||
$area = abs(
|
||||
($pointAX - $avgX) * ($data[$j][1] - $pointAY) -
|
||||
($pointAX - $data[$j][0]) * ($avgY - $pointAY)
|
||||
) * 0.5;
|
||||
|
||||
if ($area > $maxArea) {
|
||||
$maxArea = $area;
|
||||
$maxAreaIndex = $j;
|
||||
}
|
||||
}
|
||||
|
||||
$sampled[] = $data[$maxAreaIndex];
|
||||
$a = $maxAreaIndex;
|
||||
}
|
||||
|
||||
$sampled[] = $data[$dataLength - 1]; // Always keep last point
|
||||
|
||||
return $sampled;
|
||||
return $method->invoke($server, $data, $threshold);
|
||||
}
|
||||
|
||||
it('returns data unchanged when below threshold', function () {
|
||||
|
|
@ -79,7 +21,7 @@ function downsampleLTTB(array $data, int $threshold): array
|
|||
[3000, 15.7],
|
||||
];
|
||||
|
||||
$result = downsampleLTTB($data, 1000);
|
||||
$result = callDownsampleLTTB($data, 1000);
|
||||
|
||||
expect($result)->toBe($data);
|
||||
});
|
||||
|
|
@ -93,21 +35,24 @@ function downsampleLTTB(array $data, int $threshold): array
|
|||
[5000, 12.0],
|
||||
];
|
||||
|
||||
$result = downsampleLTTB($data, 2);
|
||||
$result = callDownsampleLTTB($data, 2);
|
||||
expect($result)->toBe($data);
|
||||
|
||||
$result = downsampleLTTB($data, 1);
|
||||
$result = callDownsampleLTTB($data, 1);
|
||||
expect($result)->toBe($data);
|
||||
});
|
||||
|
||||
it('downsamples to target threshold count', function () {
|
||||
// Seed for reproducibility
|
||||
mt_srand(42);
|
||||
|
||||
// Generate 100 data points
|
||||
$data = [];
|
||||
for ($i = 0; $i < 100; $i++) {
|
||||
$data[] = [$i * 1000, rand(0, 100) / 10];
|
||||
$data[] = [$i * 1000, mt_rand(0, 100) / 10];
|
||||
}
|
||||
|
||||
$result = downsampleLTTB($data, 10);
|
||||
$result = callDownsampleLTTB($data, 10);
|
||||
|
||||
expect(count($result))->toBe(10);
|
||||
});
|
||||
|
|
@ -118,7 +63,7 @@ function downsampleLTTB(array $data, int $threshold): array
|
|||
$data[] = [$i * 1000, $i * 1.5];
|
||||
}
|
||||
|
||||
$result = downsampleLTTB($data, 20);
|
||||
$result = callDownsampleLTTB($data, 20);
|
||||
|
||||
// First point should be preserved
|
||||
expect($result[0])->toBe($data[0]);
|
||||
|
|
@ -133,7 +78,7 @@ function downsampleLTTB(array $data, int $threshold): array
|
|||
$data[] = [$i * 60000, sin($i / 10) * 50 + 50]; // Sine wave pattern
|
||||
}
|
||||
|
||||
$result = downsampleLTTB($data, 50);
|
||||
$result = callDownsampleLTTB($data, 50);
|
||||
|
||||
// Verify all timestamps are in non-decreasing order
|
||||
$previousTimestamp = -1;
|
||||
|
|
@ -144,15 +89,18 @@ function downsampleLTTB(array $data, int $threshold): array
|
|||
});
|
||||
|
||||
it('handles large datasets efficiently', function () {
|
||||
// Seed for reproducibility
|
||||
mt_srand(123);
|
||||
|
||||
// Simulate 30 days of data at 5-second intervals (518,400 points)
|
||||
// For test purposes, use 10,000 points
|
||||
$data = [];
|
||||
for ($i = 0; $i < 10000; $i++) {
|
||||
$data[] = [$i * 5000, rand(0, 100)];
|
||||
$data[] = [$i * 5000, mt_rand(0, 100)];
|
||||
}
|
||||
|
||||
$startTime = microtime(true);
|
||||
$result = downsampleLTTB($data, 1000);
|
||||
$result = callDownsampleLTTB($data, 1000);
|
||||
$executionTime = microtime(true) - $startTime;
|
||||
|
||||
expect(count($result))->toBe(1000);
|
||||
|
|
@ -173,7 +121,7 @@ function downsampleLTTB(array $data, int $threshold): array
|
|||
$data[] = [$i * 1000, $value];
|
||||
}
|
||||
|
||||
$result = downsampleLTTB($data, 20);
|
||||
$result = callDownsampleLTTB($data, 20);
|
||||
|
||||
// The peak (100) and valley (0) should be preserved due to LTTB algorithm
|
||||
$values = array_column($result, 1);
|
||||
|
|
|
|||
Loading…
Reference in a new issue