npmx-dev · matthewp · Mar 8, 2026 · Mar 8, 2026 · Mar 8, 2026 · coderabbitai
diff --git a/app/components/Package/TrendsChart.vue b/app/components/Package/TrendsChart.vue
@@ -19,7 +19,7 @@ import type {
 } from '~/types/chart'
 import { DATE_INPUT_MAX } from '~/utils/input'
 import { applyDataCorrection } from '~/utils/chart-data-correction'
-import { applyBlocklistCorrection, getAnomaliesForPackages } from '~/utils/download-anomalies'
+import { applyHampelCorrection } from '~/utils/download-anomalies'
 import { copyAltTextForTrendLineChart, sanitise, loadFile } from '~/utils/charts'
 
 import('vue-data-ui/style.css')
@@ -964,13 +964,8 @@ const effectiveDataSingle = computed<EvolutionData>(() => {
   }
 
   if (isDownloadsMetric.value && data.length) {
-    const pkg = effectivePackageNames.value[0] ?? props.packageName ?? ''
     if (settings.value.chartFilter.anomaliesFixed) {
-      data = applyBlocklistCorrection({
-        data,
-        packageName: pkg,
-        granularity: displayedGranularity.value,
-      })
+      data = applyHampelCorrection(data)
     }
 
     return applyDataCorrection(
@@ -1019,7 +1014,7 @@ const chartData = computed<{
     let data = state.evolutionsByPackage[pkg] ?? []
     if (isDownloadsMetric.value && data.length) {
       if (settings.value.chartFilter.anomaliesFixed) {
-        data = applyBlocklistCorrection({ data, packageName: pkg, granularity })
+        data = applyHampelCorrection(data)
       }
       data = applyDataCorrection(
         data as Array<{ value: number }>,
@@ -1681,20 +1676,6 @@ const chartConfig = computed<VueUiXyConfig>(() => {
 const isDownloadsMetric = computed(() => selectedMetric.value === 'downloads')
 const showCorrectionControls = shallowRef(false)
 
-const packageAnomalies = computed(() => getAnomaliesForPackages(effectivePackageNames.value))
-const hasAnomalies = computed(() => packageAnomalies.value.length > 0)
-
-function formatAnomalyDate(dateStr: string) {
-  const [y, m, d] = dateStr.split('-').map(Number)
-  if (!y || !m || !d) return dateStr
-  return new Intl.DateTimeFormat(locale.value, {
-    year: 'numeric',
-    month: 'short',
-    day: 'numeric',
-    timeZone: 'UTC',
-  }).format(new Date(Date.UTC(y, m - 1, d)))
-}
-
 // Trigger data loading when the metric is switched
 watch(selectedMetric, value => {
   if (!isMounted.value) return
@@ -1831,64 +1812,28 @@ watch(selectedMetric, value => {
               class="text-2xs font-mono text-fg-subtle tracking-wide uppercase flex items-center justify-between"
             >
               {{ $t('package.trends.known_anomalies') }}
-              <TooltipApp interactive :to="inModal ? '#chart-modal' : undefined">
+              <TooltipApp :to="inModal ? '#chart-modal' : undefined">
                 <button
                   type="button"
                   class="i-lucide:info w-3.5 h-3.5 text-fg-muted cursor-help"
                   :aria-label="$t('package.trends.known_anomalies')"
                 />
                 <template #content>
-                  <div class="flex flex-col gap-3">
-                    <p class="text-xs text-fg-muted">
-                      {{ $t('package.trends.known_anomalies_description') }}
-                    </p>
-                    <div v-if="hasAnomalies">
-                      <p class="text-xs text-fg-subtle font-medium">
-                        {{ $t('package.trends.known_anomalies_ranges') }}
-                      </p>
-                      <ul class="text-xs text-fg-subtle list-disc list-inside">
-                        <li v-for="a in packageAnomalies" :key="`${a.packageName}-${a.start}`">
-                          {{
-                            isMultiPackageMode
-                              ? $t('package.trends.known_anomalies_range_named', {
-                                  packageName: a.packageName,
-                                  start: formatAnomalyDate(a.start),
-                                  end: formatAnomalyDate(a.end),
-                                })
-                              : $t('package.trends.known_anomalies_range', {
-                                  start: formatAnomalyDate(a.start),
-                                  end: formatAnomalyDate(a.end),
-                                })
-                          }}
-                        </li>
-                      </ul>
-                    </div>
-                    <p v-else class="text-xs text-fg-muted">
-                      {{ $t('package.trends.known_anomalies_none', effectivePackageNames.length) }}
-                    </p>
-                    <div class="flex justify-end">
-                      <LinkBase
-                        to="https://github.com/npmx-dev/npmx.dev/edit/main/app/utils/download-anomalies.data.ts"
-                        class="text-xs text-accent"
-                      >
-                        {{ $t('package.trends.known_anomalies_contribute') }}
-                      </LinkBase>
-                    </div>
-                  </div>
+                  <p class="text-xs text-fg-muted">
+                    {{ $t('package.trends.known_anomalies_description') }}
+                  </p>
                 </template>
               </TooltipApp>
             </span>
             <label
               class="flex items-center gap-1.5 text-2xs font-mono text-fg-subtle cursor-pointer"
-              :class="{ 'opacity-50 pointer-events-none': !hasAnomalies }"
             >
               <input
-                :checked="settings.chartFilter.anomaliesFixed && hasAnomalies"
+                :checked="settings.chartFilter.anomaliesFixed"
                 @change="
                   settings.chartFilter.anomaliesFixed = ($event.target as HTMLInputElement).checked
                 "
                 type="checkbox"
-                :disabled="!hasAnomalies"
                 class="accent-[var(--accent-color,var(--fg-subtle))]"
               />
               {{ $t('package.trends.apply_correction') }}

diff --git a/app/components/Package/WeeklyDownloadStats.vue b/app/components/Package/WeeklyDownloadStats.vue
@@ -4,7 +4,7 @@ import { useCssVariables } from '~/composables/useColors'
 import type { WeeklyDataPoint } from '~/types/chart'
 import { applyDataCorrection } from '~/utils/chart-data-correction'
 import { OKLCH_NEUTRAL_FALLBACK, lightenOklch } from '~/utils/colors'
-import { applyBlocklistCorrection } from '~/utils/download-anomalies'
+import { applyHampelCorrection } from '~/utils/download-anomalies'
 import type { RepoRef } from '#shared/utils/git-providers'
 import type { VueUiSparklineConfig, VueUiSparklineDatasetItem } from 'vue-data-ui'
 import { onKeyDown } from '@vueuse/core'
@@ -186,11 +186,7 @@ const correctedDownloads = computed<WeeklyDataPoint[]>(() => {
   let data = weeklyDownloads.value as WeeklyDataPoint[]
   if (!data.length) return data
   if (settings.value.chartFilter.anomaliesFixed) {
-    data = applyBlocklistCorrection({
-      data,
-      packageName: props.packageName,
-      granularity: 'weekly',
-    }) as WeeklyDataPoint[]
+    data = applyHampelCorrection(data) as WeeklyDataPoint[]
   }
   data = applyDataCorrection(data, settings.value.chartFilter) as WeeklyDataPoint[]
   return data

diff --git a/app/utils/download-anomalies.data.ts b/app/utils/download-anomalies.data.ts
diff --git a/app/utils/download-anomalies.ts b/app/utils/download-anomalies.ts
@@ -1,129 +1,90 @@
-import type { ChartTimeGranularity, EvolutionData } from '~/types/chart'
-import { DOWNLOAD_ANOMALIES } from './download-anomalies.data'
+import type { EvolutionData } from '~/types/chart'
 
-export type DownloadAnomalyBound = {
-  date: string // YYYY-MM-DD
-  weeklyDownloads: number
-}
+/**
+ * Hampel filter for automatic anomaly detection and correction.
+ *
+ * For each data point, computes the median and Median Absolute Deviation (MAD)
+ * of a surrounding window. Points deviating more than `threshold` MADs from
+ * the local median are flagged as anomalies and replaced with the median.
+ *
+ * This approach is unbiased — it applies the same statistical test to every
+ * package equally, with no manual curation.
+ */
 
-export type DownloadAnomaly = {
-  packageName: string
-  start: DownloadAnomalyBound
-  end: DownloadAnomalyBound
-}
+const DEFAULT_HALF_WINDOW = 3
+const DEFAULT_THRESHOLD = 3
 
-function getDateString(point: Record<string, any>, granularity: ChartTimeGranularity): string {
-  switch (granularity) {
-    case 'daily':
-      return point.day
-    case 'weekly':
-      return point.weekStart
-    case 'monthly':
-      return `${point.month}-01`
-    case 'yearly':
-      return `${point.year}-01-01`
-  }
+function median(values: number[]): number {
+  const sorted = [...values].sort((a, b) => a - b)
+  const mid = Math.floor(sorted.length / 2)
+  return sorted.length % 2 !== 0 ? sorted[mid]! : (sorted[mid - 1]! + sorted[mid]!) / 2
 }
 
-/**
- * For daily the point date falls strictly between the anomaly bounds.
- * For weekly the point date is the week start, and the full 7-day range is
- * checked so any overlapping week is affected.
- * For monthly/yearly the anomaly bounds are truncated to the same resolution
- * so that any period overlapping the anomaly is caught (inclusive).
- */
-function isDateAffected(
-  date: string,
-  anomaly: DownloadAnomaly,
-  granularity: ChartTimeGranularity,
-): boolean {
-  switch (granularity) {
-    case 'daily':
-      return date > anomaly.start.date && date < anomaly.end.date
-    case 'weekly': {
-      const startWeek = date
-      const weekStartDate = new Date(`${date}T00:00:00Z`)
-      const weekEndDate = new Date(weekStartDate)
-      weekEndDate.setUTCDate(weekEndDate.getUTCDate() + 6)
-      const endWeek = weekEndDate.toISOString().slice(0, 10)
-      return startWeek <= anomaly.end.date && endWeek >= anomaly.start.date
-    }
-    case 'monthly': {
-      const startMonth = anomaly.start.date.slice(0, 7) + '-01'
-      const endMonth = anomaly.end.date.slice(0, 7) + '-01'
-      return date >= startMonth && date <= endMonth
-    }
-    case 'yearly': {
-      const startYear = anomaly.start.date.slice(0, 4) + '-01-01'
-      const endYear = anomaly.end.date.slice(0, 4) + '-01-01'
-      return date >= startYear && date <= endYear
-    }
-  }
+function mad(values: number[], med: number): number {
+  const deviations = values.map(v => Math.abs(v - med))
+  return median(deviations)
 }
 
-function scaleWeeklyValue(weeklyValue: number, granularity: ChartTimeGranularity): number {
-  switch (granularity) {
-    case 'daily':
-      return Math.round(weeklyValue / 7)
-    case 'weekly':
-      return weeklyValue
-    case 'monthly':
-      return Math.round((weeklyValue / 7) * 30)
-    case 'yearly':
-      return Math.round((weeklyValue / 7) * 365)
-  }
-}
+export function applyHampelCorrection(
+  data: EvolutionData,
+  opts?: { halfWindow?: number; threshold?: number },
+): EvolutionData {
+  // halfWindow controls how many neighbors on each side to consider.
+  // A window of 3 means we look at 7 points total (3 left + current + 3 right).
+  const halfWindow = opts?.halfWindow ?? DEFAULT_HALF_WINDOW
 
-export function getAnomaliesForPackages(
-  packageNames: string[],
-): { packageName: string; start: string; end: string }[] {
-  return DOWNLOAD_ANOMALIES.filter(a => packageNames.includes(a.packageName)).map(a => ({
-    packageName: a.packageName,
-    start: a.start.date,
-    end: a.end.date,
-  }))
-}
+  // threshold controls sensitivity. A value of 3 means a point must deviate
+  // more than 3 scaled MADs from the local median to be flagged.
+  // Higher = less sensitive, lower = more aggressive filtering.
+  const threshold = opts?.threshold ?? DEFAULT_THRESHOLD
 
-export function applyBlocklistCorrection(opts: {
-  data: EvolutionData
-  packageName: string
-  granularity: ChartTimeGranularity
-}): EvolutionData {
-  const { data, packageName, granularity } = opts
-  const anomalies = DOWNLOAD_ANOMALIES.filter(a => a.packageName === packageName)
-  if (!anomalies.length) return data
+  // Not enough data to form a full window — return as-is.
+  if (data.length < halfWindow * 2 + 1) return data
 
-  // Clone to avoid mutation
+  const values = (data as Array<{ value: number }>).map(d => d.value)
+  // Clone to avoid mutating the original data.
   const result = (data as Array<Record<string, any>>).map(d => ({ ...d }))
 
-  for (const anomaly of anomalies) {
-    // Find indices of affected points
-    const affectedIndices: number[] = []
-    for (let i = 0; i < result.length; i++) {
-      const date = getDateString(result[i]!, granularity)
-      if (isDateAffected(date, anomaly, granularity)) {
-        affectedIndices.push(i)
-      }
-    }
+  for (let i = 0; i < values.length; i++) {
+    // Build a sliding window around the current point, clamped to array bounds.
+    const start = Math.max(0, i - halfWindow)
+    const end = Math.min(values.length - 1, i + halfWindow)
+    const window = values.slice(start, end + 1)
 
-    if (!affectedIndices.length) continue
+    // The median is robust to outliers — unlike the mean, a single spike
+    // won't pull it away from the true central tendency.
+    const windowMedian = median(window)
 
-    const firstAffected = affectedIndices[0]!
-    const lastAffected = affectedIndices[affectedIndices.length - 1]!
+    // MAD (Median Absolute Deviation) measures spread without being
+    // influenced by the outliers we're trying to detect.
+    const windowMad = mad(window, windowMedian)
 
-    // Use neighbors when available, fall back to scaled weeklyDownloads
-    const scaledStart = scaleWeeklyValue(anomaly.start.weeklyDownloads, granularity)
-    const scaledEnd = scaleWeeklyValue(anomaly.end.weeklyDownloads, granularity)
+    // How far this point is from the local median.
+    const deviation = Math.abs(values[i]! - windowMedian)
 
-    const startVal = firstAffected > 0 ? result[firstAffected - 1]!.value : scaledStart
-    const endVal = lastAffected < result.length - 1 ? result[lastAffected + 1]!.value : scaledEnd
+    // MAD of 0 means most values in the window are identical.
+    // If this point differs from the median at all, it's an outlier.
+    if (windowMad === 0) {
+      if (deviation > 0) {
+        result[i]!.value = Math.round(windowMedian)
+        result[i]!.hasAnomaly = true
+      }
+      continue
+    }
 
-    const count = affectedIndices.length
-    for (let i = 0; i < count; i++) {
-      const t = (i + 1) / (count + 1)
-      result[affectedIndices[i]!]!.value = Math.round(startVal + t * (endVal - startVal))
-      result[affectedIndices[i]!]!.hasAnomaly = true
+    // Scale MAD to approximate standard deviation using the consistency
+    // constant 1.4826 (valid for normally distributed data).
+    // The resulting score is essentially "how many standard deviations
+    // away from the local median is this point?"
+    const score = deviation / (windowMad * 1.4826)
+
+    // If the score exceeds the threshold, replace with the median.
+    // This corrects the spike while preserving the surrounding trend.
+    if (score > threshold) {
+      result[i]!.value = Math.round(windowMedian)
+      result[i]!.hasAnomaly = true
     }
   }
+
   return result as EvolutionData
 }
diff --git a/i18n/locales/bg-BG.json b/i18n/locales/bg-BG.json
@@ -415,11 +415,6 @@
       "smoothing": "Изглаждане",
       "known_anomalies": "Известни аномалии",
       "known_anomalies_description": "Интерполира известни скокове в изтеглянията, причинени от ботове или CI проблеми.",
-      "known_anomalies_ranges": "Диапазони на аномалии",
-      "known_anomalies_range": "От {start} до {end}",
-      "known_anomalies_range_named": "{packageName}: от {start} до {end}",
-      "known_anomalies_none": "Няма известни аномалии за този пакет. | Няма известни аномалии за тези пакети.",
-      "known_anomalies_contribute": "Допринесете данни за аномалии",
       "apply_correction": "Прилагане на корекция",
       "copy_alt": {
         "trend_none": "предимно стабилна",

diff --git a/i18n/locales/cs-CZ.json b/i18n/locales/cs-CZ.json
@@ -459,11 +459,6 @@
       "smoothing": "Vyhlazování",
       "known_anomalies": "Známé anomálie",
       "known_anomalies_description": "Interpoluje přes známé špičky stažení způsobené boty nebo problémy s CI.",
-      "known_anomalies_ranges": "Rozsahy anomálií",
-      "known_anomalies_range": "Od {start} do {end}",
-      "known_anomalies_range_named": "{packageName}: od {start} do {end}",
-      "known_anomalies_none": "Žádné známé anomálie pro tento balíček. | Žádné známé anomálie pro tyto balíčky.",
-      "known_anomalies_contribute": "Přispět daty o anomáliích",
       "apply_correction": "Aplikovat korekci",
       "copy_alt": {
         "trend_none": "převážně stabilní",