From af0b9217ed36381ccd35c32428c1189790c42b23 Mon Sep 17 00:00:00 2001
From: Dong Nguyen <ndaidong@gmail.com>
Date: Sun, 3 May 2026 14:10:04 +0700
Subject: [PATCH 1/4] v7.2.0

- Add AI Agents guides
- Fix some minor issues
- Update packakges
- Correct type definitions
- Update CI config
---
 .aiignore                             |  15 +++
 .github/workflows/ci-test.yml         |  12 +--
 .github/workflows/codeql-analysis.yml |   2 +-
 .gitignore                            |   8 +-
 .npmignore                            |   3 +
 AGENTS.md                             | 117 ++++++++++++++++++++++
 README.md                             |  16 ++-
 index.d.ts                            | 139 ++++++++++++++++++--------
 package.json                          |  23 ++---
 src/deno/cross-fetch.js               |   2 -
 src/main.js                           |  50 ++++++++-
 src/utils/linker.js                   |  20 ++++
 src/utils/normalizer.js               |  75 ++++++++++++++
 src/utils/parseAtomFeed.js            |  35 ++++++-
 src/utils/parseJsonFeed.js            |  28 +++++-
 src/utils/parseRdfFeed.js             |  32 ++++++
 src/utils/parseRssFeed.js             |  36 ++++++-
 src/utils/retrieve.js                 |  64 +++++++++---
 src/utils/xmlparser.js                |  33 ++++++
 19 files changed, 619 insertions(+), 91 deletions(-)
 create mode 100644 .aiignore
 create mode 100644 AGENTS.md
 delete mode 100644 src/deno/cross-fetch.js

diff --git a/.aiignore b/.aiignore
new file mode 100644
index 0000000..c880c25
--- /dev/null
+++ b/.aiignore
@@ -0,0 +1,15 @@
+node_modules
+coverage
+coverage.lcov
+
+package-lock.json
+pnpm-lock.yaml
+bun.lock
+
+.env
+
+dist
+storage
+
+# AI Session Files (Private Context)
+.sessions
diff --git a/.github/workflows/ci-test.yml b/.github/workflows/ci-test.yml
index 7ea3fd9..29c8f67 100755
--- a/.github/workflows/ci-test.yml
+++ b/.github/workflows/ci-test.yml
@@ -12,27 +12,25 @@ jobs:
 
     strategy:
       matrix:
-        node_version: [20.x, 22.x, 24.x]
+        node_version: [22.x, 24.x, 25.x]
 
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v6
 
     - name: setup Node.js v${{ matrix.node_version }}
-      uses: actions/setup-node@v4
+      uses: actions/setup-node@v6
       with:
         node-version: ${{ matrix.node_version }}
 
     - name: run npm scripts
-      env:
-        PROXY_SERVER: ${{ secrets.PROXY_SERVER }}
       run: |
         npm install
         npm run lint
-        #npm run build --if-present
+        npm run build --if-present
         npm run test
 
     - name: cache node modules
-      uses: actions/cache@v4
+      uses: actions/cache@v5
       with:
         path: ~/.npm
         key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
index a77d776..5547051 100644
--- a/.github/workflows/codeql-analysis.yml
+++ b/.github/workflows/codeql-analysis.yml
@@ -38,7 +38,7 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@v4
+      uses: actions/checkout@v6
 
     # Initializes the CodeQL tools for scanning.
     - name: Initialize CodeQL
diff --git a/.gitignore b/.gitignore
index 8b34578..d417ce1 100755
--- a/.gitignore
+++ b/.gitignore
@@ -12,12 +12,14 @@ coverage
 .nyc_output
 
 yarn.lock
-coverage.lcov
-package-lock.json
 pnpm-lock.yaml
+package-lock.json
+deno.lock
+bun.lock
 
 output.json
-deno.lock
 
 bundle.cjs
 bundle.cjs.map
+
+.sessions
diff --git a/.npmignore b/.npmignore
index 2dfcba1..6292f33 100644
--- a/.npmignore
+++ b/.npmignore
@@ -4,3 +4,6 @@ coverage
 pnpm-lock.yaml
 examples
 test-data
+
+.aiignore
+.sessions
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..d3bba62
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,117 @@
+# AI Agent Instructions
+
+Coding guidelines for AI agents working in this project.
+
+## Philosophy
+
+- Minimalism. Simple is better. KISS (Keep It Simple, Stupid).
+- Clean code, easy to read, easy to delete.
+- Functional Programming — pure functions, immutability, no side effects.
+- MVP mindset — deliver the smallest thing that works, then iterate.
+
+## Security Rules (CRITICAL — no exceptions)
+
+- NEVER output or request .env and example.env file contents
+- NEVER hardcode API credentials, secret tokens, private keys or passwords in source code
+- NEVER send sensitive data to external AI services
+- Follow `.aiignore` and `.gitignore` for excluded files — do not read or reference them
+- When asking for help, sanitize data (replace real IDs, emails, tokens with placeholders)
+- Do not log sensitive information
+
+## Coding Standards (Strict)
+- Language: JavaScript (ESM syntax). No TypeScript.
+- Style: No semicolons, single quotes, 2-space indentation.
+- Respect `eslint.config.js` — do not suggest rule changes
+- Patterns:
+  - Functional Programming only. No Classes or OOP.
+  - Arrow functions are preferred.
+  - Maximum 3 parameters per function. Use objects for more.
+- Naming: camelCase for variables/functions, SNAKE_CASE for constants.
+- Documentation:
+  - Add JSDocs before all functions and exported variables.
+  - Language: Use American English for all comments and JSDocs.
+  - Constraint: NEVER use Vietnamese or other languages in the source code.
+
+### Error Handling
+
+- Handle errors explicitly — never swallow silently
+- Use try/catch with proper logging
+- Return null or throw meaningful errors
+
+```javascript
+export const send = async (params) => {
+  try {
+    const response = await ai.ask(params)
+    logger.info(`send() -> success: ${response.id}`)
+    return response
+  } catch (err) {
+    logger.error(`send() -> failed: ${err.message}`)
+    console.error(err)
+    return null
+  }
+}
+```
+
+## Testing Standards
+
+- Write tests for critical business logic, all error cases
+- Use simple test runners (node:test, bun:test, vitest)
+- No complex mocking frameworks unless necessary
+- Tests live alongside source: `[module].test.js` next to `[module].js`
+
+## Dependency Rules
+
+- Prefer built-in APIs over external packages
+- Before adding dependency, explain:
+  - Why it is needed
+  - Alternatives considered
+  - Bundle size impact
+- Never add dependency for trivial utilities
+- Avoid packages with large dependency trees
+
+## Architecture Rules
+
+- Do NOT change existing project architecture without explicit approval
+- Do NOT move or rename core modules unless requested
+- Respect module boundaries
+- Avoid cross-module coupling
+- New modules must follow existing folder structure
+
+## When Making Changes
+
+1. Read existing patterns first
+2. Follow current coding style strictly
+3. Keep dependencies minimal
+4. Handle errors explicitly
+5. Add JSDoc comments for new functions
+6. Run `npm run lint` before committing
+7. Do NOT refactor unrelated code
+8. Do NOT modify working code outside task scope
+9. Prefer minimal diff changes
+10. Preserve existing behavior unless explicitly requested
+
+## When in Doubt
+
+- Ask for clarification before generating code
+- State your assumption explicitly if proceeding without confirmation
+- Prefer doing less and asking over doing more and guessing
+
+## Git Workflow
+
+- Work only inside the current branch
+- Do NOT create or delete branches
+- Do NOT rewrite git history
+- Do NOT modify commit messages
+- Changes must correspond to the current issue
+
+## Agent References
+
+Reference these URLs when working on related topics:
+
+- Bun: https://bun.sh/llms-full.txt
+- RSS Feed: https://www.rssboard.org/rss-specification
+- RDF Feed: https://web.resource.org/rss/1.0/spec
+- ATOM Feed: https://datatracker.ietf.org/doc/html/rfc5023
+- JSON Feed: https://www.jsonfeed.org/version/1.1/
+
+---
diff --git a/README.md b/README.md
index 47ef510..071f864 100755
--- a/README.md
+++ b/README.md
@@ -10,17 +10,23 @@ To read & normalize RSS/ATOM/JSON feed data.
 
 ## Demo
 
-- [Give it a try!](https://extractus-demo.vercel.app/feed)
+- [Give it a try!](https://extractus.pwshub.com/feed)
 
 
 ## Install
 
 ```bash
-# npm, pnpm, yarn
-npm i @extractus/feed-extractor
-
 # bun
-bun add @extractus/feed-extractor
+bun add @extractus/oembed-extractor
+
+# npm
+npm i @extractus/oembed-extractor
+
+# pnpm
+pnpm install @extractus/oembed-extractor
+
+# yarn
+yarn add @extractus/oembed-extractor
 ```
 
 ## Usage
diff --git a/index.d.ts b/index.d.ts
index 60704c3..b96611d 100755
--- a/index.d.ts
+++ b/index.d.ts
@@ -1,95 +1,154 @@
 // Type definitions
 
+/**
+ * A single normalized feed entry.
+ */
 export interface FeedEntry {
-  /**
-   * id, guid, or generated identifier for the entry
-   */
+  /** Entry identifier (guid, id, or auto-generated) */
   id: string;
+  /** Permalink to the entry */
   link?: string;
+  /** Entry title */
   title?: string;
+  /** Entry description (HTML stripped, optionally truncated) */
   description?: string;
+  /** Publication date (ISO format or original) */
   published?: string;
 }
 
+/**
+ * Normalized feed data returned by all extract functions.
+ *
+ * Extra fields may be present if `getExtraFeedFields` or `getExtraEntryFields` are used.
+ */
 export interface FeedData {
-  link?: string;
+  /** Feed title */
   title?: string;
+  /** Feed link */
+  link?: string;
+  /** Feed description */
   description?: string;
+  /** Feed generator */
   generator?: string;
+  /** Feed language */
   language?: string;
+  /** Feed publication date */
   published?: string;
+  /** List of feed entries */
   entries?: Array<FeedEntry>;
 }
 
+/**
+ * Configuration for proxy-based feed fetching.
+ */
 export interface ProxyConfig {
+  /** Proxy endpoint URL; the target feed URL is appended as query param */
   target?: string;
-  headers?: any;
+  /** Custom headers to send to the proxy */
+  headers?: Record<string, string>;
 }
 
+/**
+ * Options for feed parsing and normalization.
+ */
 export interface ReaderOptions {
   /**
-   * normalize feed data or keep original
-   * default: true
+   * Normalize feed data or keep original structure.
+   * @default true
    */
   normalization?: boolean;
   /**
-   * convert datetime to ISO format
-   * default: true
+   * Convert dates to ISO 8601 format.
+   * @default true
    */
   useISODateFormat?: boolean;
   /**
-   * to truncate description
-   * default: 210
+   * Maximum length for entry descriptions (0 = no limit).
+   * @default 250
    */
   descriptionMaxLen?: number;
   /**
-   * fast-xml-parser options
-   * https://github.com/NaturalIntelligence/fast-xml-parser/blob/master/docs/v4/2.XMLparseOptions.md
+   * Options passed directly to fast-xml-parser.
+   * @see https://github.com/NaturalIntelligence/fast-xml-parser/blob/master/docs/v4/2.XMLparseOptions.md
    */
-  xmlParserOptions?: any;
+  xmlParserOptions?: Record<string, unknown>;
   /**
-   * fill in the baseurl when it does not exist in the link
-   * default: ''
+   * Base URL for resolving relative links in the feed.
+   * @default ''
    */
   baseUrl?: string;
   /**
-   * merge extra feed fields in result
+   * Callback to extract extra fields from the raw feed data.
+   * Returned properties are merged into the top-level result.
    */
-  getExtraFeedFields?: (feedData: object) => object;
+  getExtraFeedFields?: (feedData: Record<string, unknown>) => Record<string, unknown>;
   /**
-   * merge extra entry fields in result
+   * Callback to extract extra fields from each raw entry.
+   * Returned properties are merged into each entry in the result.
    */
-  getExtraEntryFields?: (entryData: object) => object;
+  getExtraEntryFields?: (entryData: Record<string, unknown>) => Record<string, unknown>;
 }
 
+/**
+ * Options for the HTTP fetch request when using `extract()`.
+ *
+ * Only `headers`, `proxy`, `agent`, and `signal` are used by the library.
+ * Other standard fetch options may be passed through to `fetch()` in non-proxy mode.
+ */
 export interface FetchOptions {
-  //  Definitions by: Ryan Graham <https://github.com/ryan-codingintrigue>
-  method?: "GET" | "POST" | "DELETE" | "PATCH" | "PUT" | "HEAD" | "OPTIONS" | "CONNECT";
-  headers?: any;
-  body?: any;
-  mode?: "cors" | "no-cors" | "same-origin";
-  credentials?: "omit" | "same-origin" | "include";
-  cache?: "default" | "no-store" | "reload" | "no-cache" | "force-cache" | "only-if-cached";
-  redirect?: "follow" | "error" | "manual";
-  referrer?: string;
-  referrerPolicy?: "referrer" | "no-referrer-when-downgrade" | "origin" | "origin-when-cross-origin" | "unsafe-url";
-  integrity?: any;
+  /** Request headers (e.g. User-Agent) */
+  headers?: Record<string, string>;
+  /** Proxy configuration to route the request through an intermediary */
   proxy?: ProxyConfig;
-  /**
-   * http proxy agent
-   * default: null
-   */
+  /** HTTP/HTTPS proxy agent (e.g. HttpsProxyAgent) */
   agent?: object;
-  /**
-   * signal to terminate request
-   * default: null
-   */
+  /** AbortSignal to cancel the request (e.g. AbortSignal.timeout()) */
   signal?: object;
 }
 
+/**
+ * Parse an XML string into normalized feed data.
+ *
+ * Automatically detects RSS 2.0, Atom, and RDF/RSS 1.0 formats.
+ *
+ * @param xml - XML feed string
+ * @param options - Parser options
+ * @returns Normalized feed data
+ */
 export function extractFromXml(xml: string, options?: ReaderOptions): FeedData;
-export function extractFromJson(json: string, options?: ReaderOptions): FeedData;
 
+/**
+ * Parse a JSON Feed object (or JSON string) into normalized feed data.
+ *
+ * Accepts both a parsed JavaScript object or a JSON string.
+ *
+ * @param json - JSON Feed object or JSON string
+ * @param options - Parser options
+ * @returns Normalized feed data
+ */
+export function extractFromJson(json: Record<string, unknown> | string, options?: ReaderOptions): FeedData;
+
+/**
+ * Fetch and parse a feed from a URL.
+ *
+ * Supports RSS, Atom, RDF, and JSON Feed formats.
+ * Content type is auto-detected from the HTTP response.
+ *
+ * @param url - Feed source URL
+ * @param options - Parser options
+ * @param fetchOptions - HTTP fetch options
+ * @returns Promise resolving to normalized feed data
+ */
 export function extract(url: string, options?: ReaderOptions, fetchOptions?: FetchOptions): Promise<FeedData>;
 
+/**
+ * @deprecated Use `extract()` instead.
+ *
+ * Fetch and parse a feed from a URL.
+ *
+ * @param url - Feed source URL
+ * @param options - Parser options
+ * @param fetchOptions - HTTP fetch options
+ * @returns Promise resolving to normalized feed data
+ */
 export function read(url: string, options?: ReaderOptions, fetchOptions?: FetchOptions): Promise<FeedData>;
diff --git a/package.json b/package.json
index e1e5d95..23d8e16 100755
--- a/package.json
+++ b/package.json
@@ -1,5 +1,5 @@
 {
-  "version": "7.1.7",
+  "version": "7.2.0",
   "name": "@extractus/feed-extractor",
   "description": "To read and normalize RSS/ATOM/JSON feed data",
   "homepage": "https://github.com/extractus/feed-extractor",
@@ -18,12 +18,6 @@
       "default": "./src/main.js"
     }
   },
-  "imports": {
-    "cross-fetch": "./src/deno/cross-fetch.js"
-  },
-  "browser": {
-    "cross-fetch": "./src/deno/cross-fetch.js"
-  },
   "types": "./index.d.ts",
   "engines": {
     "node": ">= 20"
@@ -40,17 +34,16 @@
   },
   "dependencies": {
     "@ndaidong/bellajs": "^12.0.1",
-    "cross-fetch": "^4.1.0",
-    "fast-xml-parser": "^5.2.5",
+    "fast-xml-parser": "^5.7.2",
     "html-entities": "^2.6.0"
   },
   "devDependencies": {
-    "@eslint/js": "^9.34.0",
-    "esbuild": "^0.25.9",
-    "eslint": "^9.34.0",
-    "globals": "^16.3.0",
-    "https-proxy-agent": "^7.0.6",
-    "nock": "^14.0.10"
+    "@eslint/js": "^10.0.1",
+    "esbuild": "^0.28.0",
+    "eslint": "^10.3.0",
+    "globals": "^17.6.0",
+    "https-proxy-agent": "^9.0.0",
+    "nock": "^14.0.14"
   },
   "keywords": [
     "extractor",
diff --git a/src/deno/cross-fetch.js b/src/deno/cross-fetch.js
deleted file mode 100644
index d084f98..0000000
--- a/src/deno/cross-fetch.js
+++ /dev/null
@@ -1,2 +0,0 @@
-// cross-fetch.js
-export default fetch
diff --git a/src/main.js b/src/main.js
index e41ba9e..2fc11c7 100755
--- a/src/main.js
+++ b/src/main.js
@@ -9,6 +9,12 @@ import parseRssFeed from './utils/parseRssFeed.js'
 import parseAtomFeed from './utils/parseAtomFeed.js'
 import parseRdfFeed from './utils/parseRdfFeed.js'
 
+/**
+ * Normalize parser options with defaults.
+ *
+ * @param {Object} [options={}] - User-provided parser options
+ * @returns {Object} Normalized options with defaults applied
+ */
 const getopt = (options = {}) => {
   const {
     normalization = true,
@@ -31,10 +37,27 @@ const getopt = (options = {}) => {
   }
 }
 
+/**
+ * Extract feed data from a JSON string/object.
+ *
+ * @param {Object|string} json - JSON Feed data
+ * @param {Object} [options={}] - Parser options
+ * @returns {Object} Normalized feed data
+ */
 export const extractFromJson = (json, options = {}) => {
   return parseJsonFeed(json, getopt(options))
 }
 
+/**
+ * Extract feed data from an XML string.
+ *
+ * Automatically detects RSS 2.0, Atom, and RDF/RSS 1.0 formats.
+ *
+ * @param {string} xml - XML feed string
+ * @param {Object} [options={}] - Parser options
+ * @returns {Object} Normalized feed data
+ * @throws {Error} If XML is not well-formed or format is unrecognized
+ */
 export const extractFromXml = (xml, options = {}) => {
   if (!validate(xml)) {
     throw new Error('The XML document is not well-formed')
@@ -44,15 +67,31 @@ export const extractFromXml = (xml, options = {}) => {
 
   const data = xml2obj(xml, opts.xmlParserOptions)
 
-  return isRSS(data)
+  const result = isRSS(data)
     ? parseRssFeed(data, opts)
     : isAtom(data)
       ? parseAtomFeed(data, opts)
       : isRdf(data)
         ? parseRdfFeed(data, opts)
         : null
+  if (!result) {
+    throw new Error('Unrecognized feed format')
+  }
+  return result
 }
 
+/**
+ * Fetch and extract feed data from a URL.
+ *
+ * Supports RSS, Atom, RDF, and JSON Feed formats.
+ * Automatically detects content type and dispatches to the appropriate parser.
+ *
+ * @param {string} url - Feed source URL
+ * @param {Object} [options={}] - Parser options (normalization, date format, etc.)
+ * @param {Object} [fetchOptions={}] - Fetch options (headers, proxy, agent, signal)
+ * @returns {Promise<Object>} Normalized feed data
+ * @throws {Error} On invalid URL, fetch failure, or parse failure
+ */
 export const extract = async (url, options = {}, fetchOptions = {}) => {
   if (!isValidUrl(url)) {
     throw new Error('Input param must be a valid URL')
@@ -68,6 +107,15 @@ export const extract = async (url, options = {}, fetchOptions = {}) => {
   return type === 'json' ? extractFromJson(json, options) : extractFromXml(text, options)
 }
 
+/**
+ * Deprecated. Use {@link extract} instead.
+ *
+ * @param {string} url - Feed source URL
+ * @param {Object} [options] - Parser options
+ * @param {Object} [fetchOptions] - Fetch options
+ * @returns {Promise<Object>} Normalized feed data
+ * @deprecated Since v7.0. Use `extract()` instead
+ */
 export const read = async (url, options, fetchOptions) => {
   console.warn('WARNING: read() is deprecated. Please use extract() instead!')
   return extract(url, options, fetchOptions)
diff --git a/src/utils/linker.js b/src/utils/linker.js
index d86e47a..090a960 100755
--- a/src/utils/linker.js
+++ b/src/utils/linker.js
@@ -1,5 +1,11 @@
 // utils -> linker
 
+/**
+ * Check if a string is a valid HTTP/HTTPS URL.
+ *
+ * @param {string} url - URL string to validate
+ * @returns {boolean} True if the URL is valid and uses http/https protocol
+ */
 export const isValid = (url = '') => {
   try {
     const ourl = new URL(url)
@@ -9,6 +15,13 @@ export const isValid = (url = '') => {
   }
 }
 
+/**
+ * Resolve a relative URL against an absolute base URL.
+ *
+ * @param {string} fullUrl - Base absolute URL
+ * @param {string} relativeUrl - Relative URL to resolve
+ * @returns {string} Resolved absolute URL, or empty string on failure
+ */
 export const absolutify = (fullUrl = '', relativeUrl = '') => {
   try {
     const result = new URL(relativeUrl, fullUrl)
@@ -18,6 +31,7 @@ export const absolutify = (fullUrl = '', relativeUrl = '') => {
   }
 }
 
+/** @type {string[]} Known tracking query param keys to strip from URLs */
 const blacklistKeys = [
   'CNDID',
   '__twitter_impression',
@@ -78,6 +92,12 @@ const blacklistKeys = [
   'pk_campaign',
 ]
 
+/**
+ * Remove known tracking parameters and hash fragment from a URL.
+ *
+ * @param {string} url - URL to purify
+ * @returns {string|null} Purified URL string, or null on failure
+ */
 export const purify = (url) => {
   try {
     const pureUrl = new URL(url)
diff --git a/src/utils/normalizer.js b/src/utils/normalizer.js
index c5f11a7..5eff768 100644
--- a/src/utils/normalizer.js
+++ b/src/utils/normalizer.js
@@ -13,6 +13,12 @@ import { decode } from 'html-entities'
 
 import { absolutify, isValid as isValidUrl, purify as purifyUrl } from './linker.js'
 
+/**
+ * Convert a date string to ISO 8601 format.
+ *
+ * @param {string} dstr - Date string to convert
+ * @returns {string} ISO date string, or empty string on failure
+ */
 export const toISODateString = (dstr) => {
   try {
     return dstr ? (new Date(dstr)).toISOString() : ''
@@ -21,17 +27,41 @@ export const toISODateString = (dstr) => {
   }
 }
 
+/**
+ * Strip HTML tags and optionally truncate a description string.
+ *
+ * @param {string} val - Raw description value
+ * @param {number} [maxlen=0] - Maximum length (0 = no truncation)
+ * @returns {string} Cleaned and optionally truncated description
+ */
 export const buildDescription = (val, maxlen = 0) => {
   const stripped = stripTags(String(val).trim().replace(/^<!\[CDATA\[|\]\]>$/g, ''))
   const text = maxlen > 0 ? truncate(stripped, maxlen) : stripped
   return text.replace(/\n+/g, ' ')
 }
 
+/**
+ * Extract text content from a parsed XML node.
+ *
+ * Handles multiple known property shapes: `_text`, `#text`, `_cdata`, `$t`.
+ *
+ * @param {*} val - Value to extract text from
+ * @returns {string} Decoded and trimmed text content
+ */
 export const getText = (val) => {
   const txt = isObject(val) ? (val._text || val['#text'] || val._cdata || val.$t) : val
   return txt ? decode(String(txt).trim()) : ''
 }
 
+/**
+ * Extract a URL link from a parsed XML node.
+ *
+ * Supports multiple link formats: string, `href`, `@_href`, `@_url`, `_attributes.href`.
+ *
+ * @param {*} val - Link value (string, object, or array)
+ * @param {string|Object} [id=''] - GUID object or string for fallback URL
+ * @returns {string} Extracted URL string
+ */
 export const getLink = (val = [], id = '') => {
   if (isObject(id) && hasProperty(id, '@_isPermaLink') && id['@_isPermaLink'] === 'true') {
     return getText(id)
@@ -57,6 +87,16 @@ export const getLink = (val = [], id = '') => {
   return url ? url : isValidUrl(id) ? id : ''
 }
 
+/**
+ * Extract a purified absolute URL from feed entry data.
+ *
+ * Will strip tracking params via `purify` and resolve relative URLs via `absolutify`.
+ *
+ * @param {*} url - Link value from feed entry
+ * @param {string} [id=''] - Fallback identifier URL
+ * @param {string} [baseUrl=''] - Base URL for resolving relative links
+ * @returns {string} Purified absolute URL string
+ */
 export const getPureUrl = (url, id = '', baseUrl) => {
   const link = getLink(url, id)
   const pu = purifyUrl(link)
@@ -68,12 +108,34 @@ export const getPureUrl = (url, id = '', baseUrl) => {
     : ''
 }
 
+/**
+ * Generate a consistent hash from a string.
+ *
+ * @param {string} str - Input string
+ * @returns {string} Base-36 encoded hash
+ */
 const hash = (str) => Math.abs(str.split('').reduce((s, c) => Math.imul(31, s) + c.charCodeAt(0) | 0, 0)).toString(36)
 
+/**
+ * Generate a stable entry ID from identifier, URL, and publication date.
+ *
+ * Falls back to a hash-based ID when no explicit identifier is available.
+ *
+ * @param {*} id - Entry identifier (guid/id) value
+ * @param {string} url - Entry URL
+ * @param {string} pubDate - Publication date string
+ * @returns {string} Resolved entry ID
+ */
 export const getEntryId = (id, url, pubDate) => {
   return id ? getText(id) : hash(getPureUrl(url)) + '-' + (new Date(pubDate)).getTime()
 }
 
+/**
+ * Extract enclosure metadata from a parsed XML node.
+ *
+ * @param {Object} val - Enclosure object with `@_url`, `@_type`, `@_length`
+ * @returns {Object|null} Enclosure object `{ url, type, length }`, or null
+ */
 export const getEnclosure = (val) => {
   const url = hasProperty(val, '@_url') ? val['@_url'] : ''
   const type = hasProperty(val, '@_type') ? val['@_type'] : ''
@@ -87,6 +149,12 @@ export const getEnclosure = (val) => {
     }
 }
 
+/**
+ * Build a category object from a parsed XML node.
+ *
+ * @param {*} v - Category value (string or object)
+ * @returns {Object|string} Category object `{ text, domain }` or raw string
+ */
 const getCategory = (v) => {
   return isObject(v)
     ? {
@@ -96,6 +164,13 @@ const getCategory = (v) => {
     : v
 }
 
+/**
+ * Normalize optional feed/entry tags (source, category, enclosure).
+ *
+ * @param {*} val - Raw tag value
+ * @param {string} key - Tag name
+ * @returns {*} Normalized tag value
+ */
 export const getOptionalTags = (val, key) => {
   if (key === 'source') {
     return {
diff --git a/src/utils/parseAtomFeed.js b/src/utils/parseAtomFeed.js
index 99435da..94df305 100644
--- a/src/utils/parseAtomFeed.js
+++ b/src/utils/parseAtomFeed.js
@@ -13,6 +13,13 @@ import {
   getEntryId
 } from './normalizer.js'
 
+/**
+ * Transform a single Atom entry into a normalized entry object.
+ *
+ * @param {Object} item - Raw Atom entry from parsed XML
+ * @param {Object} options - Parser options
+ * @returns {Object} Normalized entry with id, title, link, published, description
+ */
 const transform = (item, options) => {
   const {
     useISODateFormat,
@@ -51,6 +58,15 @@ const transform = (item, options) => {
   }
 }
 
+/**
+ * Flatten raw Atom feed data without normalization.
+ *
+ * Preserves original structure while cleaning text and links.
+ *
+ * @param {Object} feed - Raw Atom feed data
+ * @param {string} baseUrl - Base URL for resolving relative links
+ * @returns {Object} Feed data with cleaned entries
+ */
 const flatten = (feed, baseUrl) => {
   const {
     id,
@@ -91,6 +107,16 @@ const flatten = (feed, baseUrl) => {
   return output
 }
 
+/**
+ * Parse and normalize Atom feed data into a standard structure.
+ *
+ * When `normalization` is false, returns flattened raw data instead.
+ * Extracts language from `xml:lang` attribute when present.
+ *
+ * @param {Object} data - Parsed Atom XML object
+ * @param {Object} [options={}] - Parser options
+ * @returns {Object} Normalized feed object with entries array
+ */
 const parseAtom = (data, options = {}) => {
   const {
     normalization,
@@ -110,7 +136,7 @@ const parseAtom = (data, options = {}) => {
     link = '',
     subtitle = '',
     generator = '',
-    language = '',
+    language = feedData.language || feedData['@_xml:lang'] || '',
     updated = '',
     entry: item = [],
   } = feedData
@@ -135,6 +161,13 @@ const parseAtom = (data, options = {}) => {
   }
 }
 
+/**
+ * Parse Atom feed data from a parsed XML object.
+ *
+ * @param {Object} data - Parsed Atom XML object
+ * @param {Object} [options={}] - Parser options
+ * @returns {Object} Normalized or flattened feed data
+ */
 export default (data, options = {}) => {
   return parseAtom(data, options)
 }
diff --git a/src/utils/parseJsonFeed.js b/src/utils/parseJsonFeed.js
index d8258c7..1d6b063 100644
--- a/src/utils/parseJsonFeed.js
+++ b/src/utils/parseJsonFeed.js
@@ -13,6 +13,13 @@ import {
 
 import { absolutify, purify as purifyUrl } from './linker.js'
 
+/**
+ * Transform a single JSON Feed item into a normalized entry object.
+ *
+ * @param {Object} item - Raw JSON Feed item
+ * @param {Object} options - Parser options
+ * @returns {Object} Normalized entry with id, title, link, published, description
+ */
 const transform = (item, options) => {
   const {
     useISODateFormat,
@@ -48,6 +55,16 @@ const transform = (item, options) => {
   }
 }
 
+/**
+ * Parse and normalize JSON Feed data into a standard structure.
+ *
+ * When `normalization` is false, returns the raw data as-is.
+ * Extracts feed-level published date from `date_published` or `date_modified`.
+ *
+ * @param {Object} data - JSON Feed object
+ * @param {Object} options - Parser options
+ * @returns {Object} Normalized feed object with entries array
+ */
 const parseJson = (data, options) => {
   const {
     normalization,
@@ -62,6 +79,8 @@ const parseJson = (data, options) => {
   const {
     title = '',
     home_page_url: homepageUrl = '',
+    date_published: pubDate = '',
+    date_modified: modDate = '',
     description = '',
     language = '',
     items: item = [],
@@ -76,7 +95,7 @@ const parseJson = (data, options) => {
     link: purifyUrl(homepageUrl) || absolutify(baseUrl, homepageUrl),
     description,
     language,
-    published: '',
+    published: pubDate || modDate,
     generator: '',
     ...extraFields,
     entries: items.map((item) => {
@@ -85,6 +104,13 @@ const parseJson = (data, options) => {
   }
 }
 
+/**
+ * Parse JSON Feed data from a JSON object.
+ *
+ * @param {Object} data - JSON Feed object
+ * @param {Object} [options={}] - Parser options
+ * @returns {Object} Normalized or raw feed data
+ */
 export default (data, options = {}) => {
   return parseJson(data, options)
 }
diff --git a/src/utils/parseRdfFeed.js b/src/utils/parseRdfFeed.js
index c857a90..3d64c31 100644
--- a/src/utils/parseRdfFeed.js
+++ b/src/utils/parseRdfFeed.js
@@ -12,6 +12,13 @@ import {
   getEntryId
 } from './normalizer.js'
 
+/**
+ * Transform a single RDF item into a normalized entry object.
+ *
+ * @param {Object} item - Raw RDF item from parsed XML
+ * @param {Object} options - Parser options
+ * @returns {Object} Normalized entry with id, title, link, published, description
+ */
 const transform = (item, options) => {
   const {
     useISODateFormat,
@@ -47,6 +54,15 @@ const transform = (item, options) => {
   }
 }
 
+/**
+ * Flatten raw RDF feed data without normalization.
+ *
+ * Preserves original structure while cleaning text and links.
+ *
+ * @param {Object} feed - Raw RDF channel data
+ * @param {string} baseUrl - Base URL for resolving relative links
+ * @returns {Object} Feed data with cleaned entries
+ */
 const flatten = (feed, baseUrl) => {
   const {
     title = '',
@@ -80,6 +96,15 @@ const flatten = (feed, baseUrl) => {
   return output
 }
 
+/**
+ * Parse and normalize RDF/RSS 1.0 feed data into a standard structure.
+ *
+ * When `normalization` is false, returns flattened raw data instead.
+ *
+ * @param {Object} data - Parsed RDF XML object
+ * @param {Object} [options={}] - Parser options
+ * @returns {Object} Normalized feed object with entries array
+ */
 const parseRdf = (data, options = {}) => {
   const {
     normalization,
@@ -124,6 +149,13 @@ const parseRdf = (data, options = {}) => {
   }
 }
 
+/**
+ * Parse RDF/RSS 1.0 feed data from a parsed XML object.
+ *
+ * @param {Object} data - Parsed RDF XML object
+ * @param {Object} [options={}] - Parser options
+ * @returns {Object} Normalized or flattened feed data
+ */
 export default (data, options = {}) => {
   return parseRdf(data, options)
 }
diff --git a/src/utils/parseRssFeed.js b/src/utils/parseRssFeed.js
index 27b5e3c..1068175 100644
--- a/src/utils/parseRssFeed.js
+++ b/src/utils/parseRssFeed.js
@@ -13,6 +13,13 @@ import {
   getEntryId
 } from './normalizer.js'
 
+/**
+ * Transform a single RSS item into a normalized entry object.
+ *
+ * @param {Object} item - Raw RSS item from parsed XML
+ * @param {Object} options - Parser options
+ * @returns {Object} Normalized entry with id, title, link, published, description
+ */
 const transform = (item, options) => {
   const {
     useISODateFormat,
@@ -48,6 +55,15 @@ const transform = (item, options) => {
   }
 }
 
+/**
+ * Flatten raw RSS feed data without normalization.
+ *
+ * Preserves original structure while cleaning text and links.
+ *
+ * @param {Object} feed - Raw RSS channel data
+ * @param {string} baseUrl - Base URL for resolving relative links
+ * @returns {Object} Feed data with cleaned entries
+ */
 const flatten = (feed, baseUrl) => {
   const {
     title = '',
@@ -69,7 +85,7 @@ const flatten = (feed, baseUrl) => {
       link: getPureUrl(link, id, baseUrl),
     }
 
-    const txtTags = 'guid description source'.split(' ')
+    const txtTags = 'guid description'.split(' ')
 
     txtTags.forEach((key) => {
       if (hasProperty(entry, key)) {
@@ -80,7 +96,7 @@ const flatten = (feed, baseUrl) => {
     const optionalProps = 'source category enclosure author image'.split(' ')
     optionalProps.forEach((key) => {
       if (hasProperty(item, key)) {
-        entry[key] = getOptionalTags(item[key], key)
+        item[key] = getOptionalTags(item[key], key)
       }
     })
 
@@ -96,6 +112,15 @@ const flatten = (feed, baseUrl) => {
   return output
 }
 
+/**
+ * Parse and normalize RSS 2.0 feed data into a standard structure.
+ *
+ * When `normalization` is false, returns flattened raw data instead.
+ *
+ * @param {Object} data - Parsed RSS XML object
+ * @param {Object} [options={}] - Parser options
+ * @returns {Object} Normalized feed object with entries array
+ */
 const parseRss = (data, options = {}) => {
   const {
     normalization,
@@ -139,6 +164,13 @@ const parseRss = (data, options = {}) => {
   }
 }
 
+/**
+ * Parse RSS 2.0 feed data from a parsed XML object.
+ *
+ * @param {Object} data - Parsed RSS XML object
+ * @param {Object} [options={}] - Parser options
+ * @returns {Object} Normalized or flattened feed data
+ */
 export default (data, options = {}) => {
   return parseRss(data, options)
 }
diff --git a/src/utils/retrieve.js b/src/utils/retrieve.js
index 4de4d12..c66f1a3 100755
--- a/src/utils/retrieve.js
+++ b/src/utils/retrieve.js
@@ -1,21 +1,37 @@
 // utils -> retrieve
 
-import fetch from 'cross-fetch'
 import { XMLParser } from 'fast-xml-parser'
 
+/**
+ * Fetch feed content through a proxy endpoint.
+ *
+ * Appends the target URL as an encoded query param to the proxy target.
+ * Merges request headers with proxy-specific headers.
+ *
+ * @param {string} url - Feed URL to fetch
+ * @param {Object} [options={}] - Fetch options including proxy config, headers, agent, signal
+ * @returns {Promise<Response>} Fetch response object
+ */
 const profetch = async (url, options = {}) => {
-  const { proxy = {}, signal = null } = options
+  const { proxy = {}, headers = {}, agent = null, signal = null } = options
   const {
     target,
-    headers = {},
+    headers: proxyHeaders = {},
   } = proxy
   const res = await fetch(target + encodeURIComponent(url), {
-    headers,
+    headers: { ...headers, ...proxyHeaders },
+    agent,
     signal,
   })
   return res
 }
 
+/**
+ * Extract charset encoding from the first line of an XML document.
+ *
+ * @param {string} text - Raw XML text
+ * @returns {string} Detected charset or 'utf8'
+ */
 const getCharsetFromText = (text) => {
   try {
     const firstLine = text.split('\n')[0].trim().replace('<?', '<').replace('?>', '>')
@@ -30,6 +46,17 @@ const getCharsetFromText = (text) => {
   }
 }
 
+/**
+ * Fetch and detect feed content from a URL.
+ *
+ * Returns structured data indicating whether the response is XML or JSON,
+ * along with decoded text and content metadata.
+ *
+ * @param {string} url - Feed URL to retrieve
+ * @param {Object} [options={}] - Fetch options (headers, proxy, agent, signal)
+ * @returns {Promise<Object>} Object with `type`, `text` or `json`, `status`, `contentType`
+ * @throws {Error} On HTTP errors, invalid content types, or parse failures
+ */
 export default async (url, options = {}) => {
   const {
     headers = {
@@ -40,7 +67,9 @@ export default async (url, options = {}) => {
     signal = null,
   } = options
 
-  const res = proxy ? await profetch(url, { proxy, signal }) : await fetch(url, { headers, agent, signal })
+  const res = proxy
+    ? await profetch(url, { proxy, headers, agent, signal })
+    : await fetch(url, { headers, agent, signal })
 
   const status = res.status
   if (status >= 400) {
@@ -50,14 +79,6 @@ export default async (url, options = {}) => {
   const buffer = await res.arrayBuffer()
   const text = buffer ? Buffer.from(buffer).toString().trim() : ''
 
-  if (/(\+|\/)(xml|html)/.test(contentType)) {
-    const arr = contentType.split('charset=')
-    let charset = arr.length === 2 ? arr[1].trim() : getCharsetFromText(text)
-    const decoder = new TextDecoder(charset)
-    const xml = decoder.decode(buffer)
-    return { type: 'xml', text: xml.trim(), status, contentType }
-  }
-
   if (/(\+|\/)json/.test(contentType)) {
     try {
       const data = JSON.parse(text)
@@ -66,5 +87,22 @@ export default async (url, options = {}) => {
       throw new Error('Failed to convert data to JSON object')
     }
   }
+
+  const arr = contentType.split('charset=')
+  let charset = arr.length === 2 ? arr[1].trim() : getCharsetFromText(text)
+  const decoder = new TextDecoder(charset)
+  const xml = decoder.decode(buffer)
+
+  const startTokens = [
+    '<?xml',
+    '<rss/',
+    '<feed/',
+    '<rdf:',
+  ]
+
+  if (/(\+|\/)(xml|html)/.test(contentType) || startTokens.some(x => xml.startsWith(x))) {
+    return { type: 'xml', text: xml.trim(), status, contentType }
+  }
+
   throw new Error(`Invalid content type: ${contentType}`)
 }
diff --git a/src/utils/xmlparser.js b/src/utils/xmlparser.js
index a82e9fe..f6d6fac 100755
--- a/src/utils/xmlparser.js
+++ b/src/utils/xmlparser.js
@@ -4,22 +4,55 @@ import { hasProperty, isString } from '@ndaidong/bellajs'
 
 import { XMLValidator, XMLParser } from 'fast-xml-parser'
 
+/**
+ * Check if parsed data represents an RSS 2.0 feed.
+ *
+ * @param {Object} [data={}] - Parsed XML object
+ * @returns {boolean} True if data has `rss.channel` structure
+ */
 export const isRSS = (data = {}) => {
   return hasProperty(data, 'rss') && hasProperty(data.rss, 'channel')
 }
 
+/**
+ * Check if parsed data represents an Atom feed.
+ *
+ * @param {Object} [data={}] - Parsed XML object
+ * @returns {boolean} True if data has `feed.entry` structure
+ */
 export const isAtom = (data = {}) => {
   return hasProperty(data, 'feed') && hasProperty(data.feed, 'entry')
 }
 
+/**
+ * Check if parsed data represents an RDF/RSS 1.0 feed.
+ *
+ * @param {Object} [data={}] - Parsed XML object
+ * @returns {boolean} True if data has `rdf:RDF.channel` structure
+ */
 export const isRdf = (data = {}) => {
   return hasProperty(data, 'rdf:RDF') && hasProperty(data['rdf:RDF'], 'channel')
 }
 
+/**
+ * Validate whether an XML string is well-formed.
+ *
+ * @param {string} xml - XML string to validate
+ * @returns {boolean} True if XML is well-formed
+ */
 export const validate = (xml) => {
   return (!isString(xml) || !xml.length) ? false : XMLValidator.validate(xml) === true
 }
 
+/**
+ * Parse an XML string into a JavaScript object.
+ *
+ * Uses fast-xml-parser with `ignoreAttributes: false` and `attributeNamePrefix: '@_'`.
+ *
+ * @param {string} [xml=''] - XML string to parse
+ * @param {Object} [extraOptions={}] - Additional parser options
+ * @returns {Object} Parsed JavaScript object
+ */
 export const xml2obj = (xml = '', extraOptions = {}) => {
   const options = {
     attributeNamePrefix: '@_',

From 849b5e0f4b1d40908fc9e12a505419925e310af7 Mon Sep 17 00:00:00 2001
From: Dong Nguyen <ndaidong@gmail.com>
Date: Sun, 3 May 2026 14:13:45 +0700
Subject: [PATCH 2/4] Stop building CJS from CI

---
 .github/workflows/ci-test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci-test.yml b/.github/workflows/ci-test.yml
index 29c8f67..d6c6a5f 100755
--- a/.github/workflows/ci-test.yml
+++ b/.github/workflows/ci-test.yml
@@ -26,7 +26,7 @@ jobs:
       run: |
         npm install
         npm run lint
-        npm run build --if-present
+        #npm run build --if-present
         npm run test
 
     - name: cache node modules

From 23d1224f5099335620cb9e606719659a914753a6 Mon Sep 17 00:00:00 2001
From: Dong Nguyen <ndaidong@gmail.com>
Date: Sun, 3 May 2026 14:22:07 +0700
Subject: [PATCH 3/4] Fix old issues.

- Relative links not resolved with baseUrl  (#135)
- Atom <subtitle> / feed descriptions returned as objects (#137)
---
 src/utils/parseAtomFeed.js | 2 +-
 src/utils/parseRdfFeed.js  | 4 ++--
 src/utils/parseRssFeed.js  | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/utils/parseAtomFeed.js b/src/utils/parseAtomFeed.js
index 94df305..a30c025 100644
--- a/src/utils/parseAtomFeed.js
+++ b/src/utils/parseAtomFeed.js
@@ -150,7 +150,7 @@ const parseAtom = (data, options = {}) => {
   return {
     title: getText(title),
     link: getPureUrl(link, id, baseUrl),
-    description: subtitle,
+    description: getText(subtitle),
     language,
     generator,
     published,
diff --git a/src/utils/parseRdfFeed.js b/src/utils/parseRdfFeed.js
index 3d64c31..190c799 100644
--- a/src/utils/parseRdfFeed.js
+++ b/src/utils/parseRdfFeed.js
@@ -90,7 +90,7 @@ const flatten = (feed, baseUrl) => {
   const output = {
     ...feed,
     title: getText(title),
-    link: getPureUrl(link, baseUrl),
+    link: getPureUrl(link, '', baseUrl),
     item: isArray(item) ? entries : entries[0],
   }
   return output
@@ -138,7 +138,7 @@ const parseRdf = (data, options = {}) => {
   return {
     title: getText(title),
     link: getPureUrl(link, '', baseUrl),
-    description,
+    description: getText(description),
     language,
     generator,
     published,
diff --git a/src/utils/parseRssFeed.js b/src/utils/parseRssFeed.js
index 1068175..247120a 100644
--- a/src/utils/parseRssFeed.js
+++ b/src/utils/parseRssFeed.js
@@ -106,7 +106,7 @@ const flatten = (feed, baseUrl) => {
   const output = {
     ...feed,
     title: getText(title),
-    link: getPureUrl(link, baseUrl),
+    link: getPureUrl(link, '', baseUrl),
     item: isArray(item) ? entries : entries[0],
   }
   return output
@@ -153,7 +153,7 @@ const parseRss = (data, options = {}) => {
   return {
     title: getText(title),
     link: getPureUrl(link, '', baseUrl),
-    description,
+    description: getText(description),
     language,
     generator,
     published,

From 373a55f399517d1a7ae17bc46e0fdbcdb9558c20 Mon Sep 17 00:00:00 2001
From: Dong Nguyen <ndaidong@gmail.com>
Date: Sun, 3 May 2026 14:30:01 +0700
Subject: [PATCH 4/4] Use TextDecoder over Buffer

Related issue: #133
---
 src/utils/retrieve.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utils/retrieve.js b/src/utils/retrieve.js
index c66f1a3..daad8f4 100755
--- a/src/utils/retrieve.js
+++ b/src/utils/retrieve.js
@@ -77,7 +77,7 @@ export default async (url, options = {}) => {
   }
   const contentType = res.headers.get('content-type')
   const buffer = await res.arrayBuffer()
-  const text = buffer ? Buffer.from(buffer).toString().trim() : ''
+  const text = buffer ? new TextDecoder().decode(buffer).trim() : ''
 
   if (/(\+|\/)json/.test(contentType)) {
     try {