Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 8 additions & 63 deletions app/components/Package/TrendsChart.vue
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import type {
} from '~/types/chart'
import { DATE_INPUT_MAX } from '~/utils/input'
import { applyDataCorrection } from '~/utils/chart-data-correction'
import { applyBlocklistCorrection, getAnomaliesForPackages } from '~/utils/download-anomalies'
import { applyHampelCorrection } from '~/utils/download-anomalies'
import { copyAltTextForTrendLineChart, sanitise, loadFile } from '~/utils/charts'

import('vue-data-ui/style.css')
Expand Down Expand Up @@ -964,13 +964,8 @@ const effectiveDataSingle = computed<EvolutionData>(() => {
}

if (isDownloadsMetric.value && data.length) {
const pkg = effectivePackageNames.value[0] ?? props.packageName ?? ''
if (settings.value.chartFilter.anomaliesFixed) {
data = applyBlocklistCorrection({
data,
packageName: pkg,
granularity: displayedGranularity.value,
})
data = applyHampelCorrection(data)
}

return applyDataCorrection(
Expand Down Expand Up @@ -1019,7 +1014,7 @@ const chartData = computed<{
let data = state.evolutionsByPackage[pkg] ?? []
if (isDownloadsMetric.value && data.length) {
if (settings.value.chartFilter.anomaliesFixed) {
data = applyBlocklistCorrection({ data, packageName: pkg, granularity })
data = applyHampelCorrection(data)
}
data = applyDataCorrection(
data as Array<{ value: number }>,
Expand Down Expand Up @@ -1681,20 +1676,6 @@ const chartConfig = computed<VueUiXyConfig>(() => {
const isDownloadsMetric = computed(() => selectedMetric.value === 'downloads')
const showCorrectionControls = shallowRef(false)

const packageAnomalies = computed(() => getAnomaliesForPackages(effectivePackageNames.value))
const hasAnomalies = computed(() => packageAnomalies.value.length > 0)

function formatAnomalyDate(dateStr: string) {
const [y, m, d] = dateStr.split('-').map(Number)
if (!y || !m || !d) return dateStr
return new Intl.DateTimeFormat(locale.value, {
year: 'numeric',
month: 'short',
day: 'numeric',
timeZone: 'UTC',
}).format(new Date(Date.UTC(y, m - 1, d)))
}

// Trigger data loading when the metric is switched
watch(selectedMetric, value => {
if (!isMounted.value) return
Expand Down Expand Up @@ -1831,64 +1812,28 @@ watch(selectedMetric, value => {
class="text-2xs font-mono text-fg-subtle tracking-wide uppercase flex items-center justify-between"
>
{{ $t('package.trends.known_anomalies') }}
<TooltipApp interactive :to="inModal ? '#chart-modal' : undefined">
<TooltipApp :to="inModal ? '#chart-modal' : undefined">
<button
type="button"
class="i-lucide:info w-3.5 h-3.5 text-fg-muted cursor-help"
:aria-label="$t('package.trends.known_anomalies')"
/>
<template #content>
<div class="flex flex-col gap-3">
<p class="text-xs text-fg-muted">
{{ $t('package.trends.known_anomalies_description') }}
</p>
<div v-if="hasAnomalies">
<p class="text-xs text-fg-subtle font-medium">
{{ $t('package.trends.known_anomalies_ranges') }}
</p>
<ul class="text-xs text-fg-subtle list-disc list-inside">
<li v-for="a in packageAnomalies" :key="`${a.packageName}-${a.start}`">
{{
isMultiPackageMode
? $t('package.trends.known_anomalies_range_named', {
packageName: a.packageName,
start: formatAnomalyDate(a.start),
end: formatAnomalyDate(a.end),
})
: $t('package.trends.known_anomalies_range', {
start: formatAnomalyDate(a.start),
end: formatAnomalyDate(a.end),
})
}}
</li>
</ul>
</div>
<p v-else class="text-xs text-fg-muted">
{{ $t('package.trends.known_anomalies_none', effectivePackageNames.length) }}
</p>
<div class="flex justify-end">
<LinkBase
to="https://github.com/npmx-dev/npmx.dev/edit/main/app/utils/download-anomalies.data.ts"
class="text-xs text-accent"
>
{{ $t('package.trends.known_anomalies_contribute') }}
</LinkBase>
</div>
</div>
<p class="text-xs text-fg-muted">
{{ $t('package.trends.known_anomalies_description') }}
</p>
</template>
</TooltipApp>
</span>
<label
class="flex items-center gap-1.5 text-2xs font-mono text-fg-subtle cursor-pointer"
:class="{ 'opacity-50 pointer-events-none': !hasAnomalies }"
>
<input
:checked="settings.chartFilter.anomaliesFixed && hasAnomalies"
:checked="settings.chartFilter.anomaliesFixed"
@change="
settings.chartFilter.anomaliesFixed = ($event.target as HTMLInputElement).checked
"
type="checkbox"
:disabled="!hasAnomalies"
class="accent-[var(--accent-color,var(--fg-subtle))]"
/>
{{ $t('package.trends.apply_correction') }}
Expand Down
8 changes: 2 additions & 6 deletions app/components/Package/WeeklyDownloadStats.vue
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { useCssVariables } from '~/composables/useColors'
import type { WeeklyDataPoint } from '~/types/chart'
import { applyDataCorrection } from '~/utils/chart-data-correction'
import { OKLCH_NEUTRAL_FALLBACK, lightenOklch } from '~/utils/colors'
import { applyBlocklistCorrection } from '~/utils/download-anomalies'
import { applyHampelCorrection } from '~/utils/download-anomalies'
import type { RepoRef } from '#shared/utils/git-providers'
import type { VueUiSparklineConfig, VueUiSparklineDatasetItem } from 'vue-data-ui'
import { onKeyDown } from '@vueuse/core'
Expand Down Expand Up @@ -186,11 +186,7 @@ const correctedDownloads = computed<WeeklyDataPoint[]>(() => {
let data = weeklyDownloads.value as WeeklyDataPoint[]
if (!data.length) return data
if (settings.value.chartFilter.anomaliesFixed) {
data = applyBlocklistCorrection({
data,
packageName: props.packageName,
granularity: 'weekly',
}) as WeeklyDataPoint[]
data = applyHampelCorrection(data) as WeeklyDataPoint[]
}
data = applyDataCorrection(data, settings.value.chartFilter) as WeeklyDataPoint[]
return data
Expand Down
30 changes: 0 additions & 30 deletions app/utils/download-anomalies.data.ts

This file was deleted.

177 changes: 69 additions & 108 deletions app/utils/download-anomalies.ts
Original file line number Diff line number Diff line change
@@ -1,129 +1,90 @@
import type { ChartTimeGranularity, EvolutionData } from '~/types/chart'
import { DOWNLOAD_ANOMALIES } from './download-anomalies.data'
import type { EvolutionData } from '~/types/chart'

export type DownloadAnomalyBound = {
date: string // YYYY-MM-DD
weeklyDownloads: number
}
/**
* Hampel filter for automatic anomaly detection and correction.
*
* For each data point, computes the median and Median Absolute Deviation (MAD)
* of a surrounding window. Points deviating more than `threshold` MADs from
* the local median are flagged as anomalies and replaced with the median.
*
* This approach is unbiased — it applies the same statistical test to every
* package equally, with no manual curation.
*/

export type DownloadAnomaly = {
packageName: string
start: DownloadAnomalyBound
end: DownloadAnomalyBound
}
const DEFAULT_HALF_WINDOW = 3
const DEFAULT_THRESHOLD = 3

function getDateString(point: Record<string, any>, granularity: ChartTimeGranularity): string {
switch (granularity) {
case 'daily':
return point.day
case 'weekly':
return point.weekStart
case 'monthly':
return `${point.month}-01`
case 'yearly':
return `${point.year}-01-01`
}
function median(values: number[]): number {
const sorted = [...values].sort((a, b) => a - b)
const mid = Math.floor(sorted.length / 2)
return sorted.length % 2 !== 0 ? sorted[mid]! : (sorted[mid - 1]! + sorted[mid]!) / 2
}

/**
* For daily the point date falls strictly between the anomaly bounds.
* For weekly the point date is the week start, and the full 7-day range is
* checked so any overlapping week is affected.
* For monthly/yearly the anomaly bounds are truncated to the same resolution
* so that any period overlapping the anomaly is caught (inclusive).
*/
function isDateAffected(
date: string,
anomaly: DownloadAnomaly,
granularity: ChartTimeGranularity,
): boolean {
switch (granularity) {
case 'daily':
return date > anomaly.start.date && date < anomaly.end.date
case 'weekly': {
const startWeek = date
const weekStartDate = new Date(`${date}T00:00:00Z`)
const weekEndDate = new Date(weekStartDate)
weekEndDate.setUTCDate(weekEndDate.getUTCDate() + 6)
const endWeek = weekEndDate.toISOString().slice(0, 10)
return startWeek <= anomaly.end.date && endWeek >= anomaly.start.date
}
case 'monthly': {
const startMonth = anomaly.start.date.slice(0, 7) + '-01'
const endMonth = anomaly.end.date.slice(0, 7) + '-01'
return date >= startMonth && date <= endMonth
}
case 'yearly': {
const startYear = anomaly.start.date.slice(0, 4) + '-01-01'
const endYear = anomaly.end.date.slice(0, 4) + '-01-01'
return date >= startYear && date <= endYear
}
}
function mad(values: number[], med: number): number {
const deviations = values.map(v => Math.abs(v - med))
return median(deviations)
}

function scaleWeeklyValue(weeklyValue: number, granularity: ChartTimeGranularity): number {
switch (granularity) {
case 'daily':
return Math.round(weeklyValue / 7)
case 'weekly':
return weeklyValue
case 'monthly':
return Math.round((weeklyValue / 7) * 30)
case 'yearly':
return Math.round((weeklyValue / 7) * 365)
}
}
export function applyHampelCorrection(
data: EvolutionData,
opts?: { halfWindow?: number; threshold?: number },
): EvolutionData {
// halfWindow controls how many neighbors on each side to consider.
// A window of 3 means we look at 7 points total (3 left + current + 3 right).
const halfWindow = opts?.halfWindow ?? DEFAULT_HALF_WINDOW
Comment on lines +32 to +34
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Do not score boundary samples with truncated windows.

Line 33 says a halfWindow of 3 uses 3 neighbours on each side, but Lines 50-52 clamp the first and last samples to shorter windows after Line 42 only validates the overall series length. That makes edge points easy false positives: 100,100,100,100,100,100,200 gets its last point flattened back to 100 because there is no right-hand context and windowMad falls to 0. Skip indices that cannot form a full symmetric window, or handle boundaries explicitly.

Suggested fix
-  for (let i = 0; i < values.length; i++) {
-    // Build a sliding window around the current point, clamped to array bounds.
-    const start = Math.max(0, i - halfWindow)
-    const end = Math.min(values.length - 1, i + halfWindow)
+  for (let i = halfWindow; i < values.length - halfWindow; i++) {
+    // Only evaluate points that have a full symmetric window.
+    const start = i - halfWindow
+    const end = i + halfWindow
     const window = values.slice(start, end + 1)

Also applies to: 41-42, 48-52


export function getAnomaliesForPackages(
packageNames: string[],
): { packageName: string; start: string; end: string }[] {
return DOWNLOAD_ANOMALIES.filter(a => packageNames.includes(a.packageName)).map(a => ({
packageName: a.packageName,
start: a.start.date,
end: a.end.date,
}))
}
// threshold controls sensitivity. A value of 3 means a point must deviate
// more than 3 scaled MADs from the local median to be flagged.
// Higher = less sensitive, lower = more aggressive filtering.
const threshold = opts?.threshold ?? DEFAULT_THRESHOLD

export function applyBlocklistCorrection(opts: {
data: EvolutionData
packageName: string
granularity: ChartTimeGranularity
}): EvolutionData {
const { data, packageName, granularity } = opts
const anomalies = DOWNLOAD_ANOMALIES.filter(a => a.packageName === packageName)
if (!anomalies.length) return data
// Not enough data to form a full window — return as-is.
if (data.length < halfWindow * 2 + 1) return data

// Clone to avoid mutation
const values = (data as Array<{ value: number }>).map(d => d.value)
// Clone to avoid mutating the original data.
const result = (data as Array<Record<string, any>>).map(d => ({ ...d }))

for (const anomaly of anomalies) {
// Find indices of affected points
const affectedIndices: number[] = []
for (let i = 0; i < result.length; i++) {
const date = getDateString(result[i]!, granularity)
if (isDateAffected(date, anomaly, granularity)) {
affectedIndices.push(i)
}
}
for (let i = 0; i < values.length; i++) {
// Build a sliding window around the current point, clamped to array bounds.
const start = Math.max(0, i - halfWindow)
const end = Math.min(values.length - 1, i + halfWindow)
const window = values.slice(start, end + 1)

if (!affectedIndices.length) continue
// The median is robust to outliers — unlike the mean, a single spike
// won't pull it away from the true central tendency.
const windowMedian = median(window)

const firstAffected = affectedIndices[0]!
const lastAffected = affectedIndices[affectedIndices.length - 1]!
// MAD (Median Absolute Deviation) measures spread without being
// influenced by the outliers we're trying to detect.
const windowMad = mad(window, windowMedian)

// Use neighbors when available, fall back to scaled weeklyDownloads
const scaledStart = scaleWeeklyValue(anomaly.start.weeklyDownloads, granularity)
const scaledEnd = scaleWeeklyValue(anomaly.end.weeklyDownloads, granularity)
// How far this point is from the local median.
const deviation = Math.abs(values[i]! - windowMedian)

const startVal = firstAffected > 0 ? result[firstAffected - 1]!.value : scaledStart
const endVal = lastAffected < result.length - 1 ? result[lastAffected + 1]!.value : scaledEnd
// MAD of 0 means most values in the window are identical.
// If this point differs from the median at all, it's an outlier.
if (windowMad === 0) {
if (deviation > 0) {
result[i]!.value = Math.round(windowMedian)
result[i]!.hasAnomaly = true
}
continue
Comment on lines +65 to +72
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

The zero-MAD branch will erase real low-volume traffic.

When windowMad === 0, Line 68 treats any deviation from the local median as an anomaly. For sparse packages, a legitimate series like 0,0,0,1,0,0,0 is rewritten to all zeros on Line 69, which drops the only real activity in that period. Please gate this path behind an absolute/relative floor, or leave zero-MAD windows untouched.

}

const count = affectedIndices.length
for (let i = 0; i < count; i++) {
const t = (i + 1) / (count + 1)
result[affectedIndices[i]!]!.value = Math.round(startVal + t * (endVal - startVal))
result[affectedIndices[i]!]!.hasAnomaly = true
// Scale MAD to approximate standard deviation using the consistency
// constant 1.4826 (valid for normally distributed data).
// The resulting score is essentially "how many standard deviations
// away from the local median is this point?"
const score = deviation / (windowMad * 1.4826)

// If the score exceeds the threshold, replace with the median.
// This corrects the spike while preserving the surrounding trend.
if (score > threshold) {
result[i]!.value = Math.round(windowMedian)
result[i]!.hasAnomaly = true
}
}

return result as EvolutionData
}
5 changes: 0 additions & 5 deletions i18n/locales/bg-BG.json
Original file line number Diff line number Diff line change
Expand Up @@ -415,11 +415,6 @@
"smoothing": "Изглаждане",
"known_anomalies": "Известни аномалии",
"known_anomalies_description": "Интерполира известни скокове в изтеглянията, причинени от ботове или CI проблеми.",
"known_anomalies_ranges": "Диапазони на аномалии",
"known_anomalies_range": "От {start} до {end}",
"known_anomalies_range_named": "{packageName}: от {start} до {end}",
"known_anomalies_none": "Няма известни аномалии за този пакет. | Няма известни аномалии за тези пакети.",
"known_anomalies_contribute": "Допринесете данни за аномалии",
"apply_correction": "Прилагане на корекция",
"copy_alt": {
"trend_none": "предимно стабилна",
Expand Down
5 changes: 0 additions & 5 deletions i18n/locales/cs-CZ.json
Original file line number Diff line number Diff line change
Expand Up @@ -459,11 +459,6 @@
"smoothing": "Vyhlazování",
"known_anomalies": "Známé anomálie",
"known_anomalies_description": "Interpoluje přes známé špičky stažení způsobené boty nebo problémy s CI.",
"known_anomalies_ranges": "Rozsahy anomálií",
"known_anomalies_range": "Od {start} do {end}",
"known_anomalies_range_named": "{packageName}: od {start} do {end}",
"known_anomalies_none": "Žádné známé anomálie pro tento balíček. | Žádné známé anomálie pro tyto balíčky.",
"known_anomalies_contribute": "Přispět daty o anomáliích",
"apply_correction": "Aplikovat korekci",
"copy_alt": {
"trend_none": "převážně stabilní",
Expand Down
Loading
Loading