diff --git a/src/_shared/utils/applyApTitleCase.test.ts b/src/_shared/utils/applyApTitleCase.test.ts index 25ce8c247..0d238b910 100644 --- a/src/_shared/utils/applyApTitleCase.test.ts +++ b/src/_shared/utils/applyApTitleCase.test.ts @@ -1,4 +1,4 @@ -import { applyApTitleCase, lowercaseAfterApostrophe } from './applyApTitleCase'; +import { applyApTitleCase } from './applyApTitleCase'; // examples taken from https://www.grammarly.com/blog/capitalization-in-the-titles/ // tested at https://headlinecapitalization.com/ (AP style) @@ -137,6 +137,106 @@ describe('applyApTitleCase', () => { expect(applyApTitleCase(swc.result)).toEqual(swc.expected); }); }); + + it('should handle new AP style stop words correctly', () => { + const testCases = [ + { + result: 'The Dog Jumped Up And Over The Fence', + expected: 'The Dog Jumped Up and Over the Fence', + }, + { + result: 'Work As A Team', + expected: 'Work as a Team', + }, + { + result: 'If You Can Dream It', + expected: 'If You Can Dream It', + }, + { + result: 'Turn Off The Lights', + expected: 'Turn off the Lights', + }, + { + result: 'Going Out Tonight', + expected: 'Going out Tonight', + }, + { + result: 'So What Do You Think', + expected: 'So What Do You Think', + }, + { + result: 'Come If You Can So We Can Talk', + expected: 'Come if You Can so We Can Talk', + }, + { + result: + "'ridiculously traumatized': CDC workers fear returning to work after fatal shooting", + expected: + "'Ridiculously Traumatized': CDC Workers Fear Returning to Work After Fatal Shooting", + }, + { + result: + "'shock. frustration. anger.' Trump's tariff letters roil Asian allies", + expected: + "'Shock. Frustration. Anger.' Trump's Tariff Letters Roil Asian Allies", + }, + { + result: + "'alligator alcatraz': what to know about Florida's new controversial migrant detention facility", + expected: + "'Alligator Alcatraz': What to Know About Florida's New Controversial Migrant Detention Facility", + }, + { + result: + "'arrogant' Arsenal star, five Man Utd flops, only one Spurs man in 'big eight' worst XI of 2024/25", + expected: + "'Arrogant' Arsenal Star, Five Man Utd Flops, Only One Spurs Man in 'Big Eight' Worst XI of 2024/25", + }, + { + result: + "'a graceful, magnetic speaker': how Susie Sorabji captivated U.S. audiences in early 20th century", + expected: + "'A Graceful, Magnetic Speaker': How Susie Sorabji Captivated U.S. Audiences in Early 20th Century", + }, + { + result: + "'beyond my wildest dreams': the architect of Project 2025 is ready for his victory lap", + expected: + "'Beyond My Wildest Dreams': The Architect of Project 2025 Is Ready for His Victory Lap", + }, + { + result: "'anora' lands top prize at PGA awards: full winners list", + expected: "'Anora' Lands Top Prize at PGA Awards: Full Winners List", + }, + { + result: + '7 binge-worthy YouTube series that are worth spending hours watching', + expected: + '7 Binge-Worthy YouTube Series That Are Worth Spending Hours Watching', + }, + { + result: + 'as insurers around the U.S. bleed cash from climate shocks, homeowners lose', + expected: + 'As Insurers Around the U.S. Bleed Cash From Climate Shocks, Homeowners Lose', + }, + { + result: + "'love actually' is a holiday must-watch for many — but these subplots haven't aged well", + expected: + "'Love Actually' Is a Holiday Must-Watch for Many — but These Subplots Haven't Aged Well", + }, + { + result: + "'iPhones are made in hell': 3 months inside China's iPhone city", + expected: + "'iPhones Are Made in Hell': 3 Months Inside China's iPhone City", + }, + ]; + testCases.forEach(({ result, expected }) => { + expect(applyApTitleCase(result)).toEqual(expected); + }); + }); it('should correctly format titles with curly apostrophes', () => { const testCases = [ { @@ -148,22 +248,40 @@ describe('applyApTitleCase', () => { expect(applyApTitleCase(result)).toEqual(expected); }); }); -}); -describe('lowercaseAfterApostrophe', () => { - it('lowercase letter after apostrophe & return new string', () => { - const result = lowercaseAfterApostrophe("foo'S"); - expect(result).toEqual("foo's"); - }); - it('lowercase letter after apostrophe, ignore string in quotes, & return new string', () => { - const result = lowercaseAfterApostrophe( - "'Foo' foo'S DaY's You'Ll 'foo Bar foo'Ss'", - ); - expect(result).toEqual("'Foo' foo's DaY's You'll 'foo Bar foo'ss'"); - }); - it('should lowercase the letter after a curly apostrophe', () => { - const input = 'Every State\u2018S Dream Travel Destination, Mapped'; - const expected = 'Every State\u2018s Dream Travel Destination, Mapped'; - expect(lowercaseAfterApostrophe(input)).toEqual(expected); + it('should keep iPhone and similar Apple products with lowercase i', () => { + const testCases = [ + { + result: 'The New IPhone Is Here', + expected: 'The New iPhone Is Here', + }, + { + result: 'IPad Pro Vs IPad Air', + expected: 'iPad Pro vs iPad Air', + }, + { + result: 'Using ICloud With Your IPod', + expected: 'Using iCloud With Your iPod', + }, + { + result: 'IMac and MacBook Pro Comparison', + expected: 'iMac and MacBook Pro Comparison', + }, + { + result: 'ITunes Is Now Apple Music', + expected: 'iTunes Is Now Apple Music', + }, + { + result: 'Send IMessage From Your IPhone', + expected: 'Send iMessage From Your iPhone', + }, + { + result: 'IBooks: The Complete Guide', + expected: 'iBooks: The Complete Guide', + }, + ]; + testCases.forEach(({ result, expected }) => { + expect(applyApTitleCase(result)).toEqual(expected); + }); }); }); diff --git a/src/_shared/utils/applyApTitleCase.ts b/src/_shared/utils/applyApTitleCase.ts index 65ef7b360..eb1377132 100644 --- a/src/_shared/utils/applyApTitleCase.ts +++ b/src/_shared/utils/applyApTitleCase.ts @@ -1,83 +1,107 @@ -export const STOP_WORDS = - 'a an and at but by for in nor of on or the to up yet'; +/** + * AP Style Title Case Implementation + * Follows Associated Press style guide for headline capitalization + * Reference: https://headlinecapitalization.com/ + */ + +// Words that should remain lowercase unless they start/end the title or follow a colon +const STOP_WORDS = new Set([ + 'a', + 'an', + 'and', + 'as', + 'at', + 'but', + 'by', + 'for', + 'if', + 'in', + 'nor', + 'of', + 'off', + 'on', + 'or', + 'out', + 'so', + 'the', + 'to', + 'vs', + 'yet', +]); -// Matches a colon (:) and 0+ white spaces following after -// Matches 1+ white spaces -// Matches special chars (i.e. hyphens, quotes, etc) -export const SEPARATORS = /(:\s*|\s+|[-‑–—,:;!?()“”'‘"])/; // Include curly quotes as separators +// Regex to split text while preserving separators (punctuation, spaces, etc.) +const SEPARATORS = /(:\s*|\s+|[-‑–—,:;!?()"'\u201C\u201D\u2018\u2019])/; -export const stop = STOP_WORDS.split(' '); +// Quote characters that trigger capitalization of the following word +const QUOTES = new Set(['"', "'", '\u2018', '\u201C']); + +// Pattern to lowercase letters after apostrophes (except Irish names like O'Brien) +// Handles both ASCII and Unicode apostrophes +const APOSTROPHES = /(? { - // Match either an ASCII or curly apostrophe followed by a letter, after a word character. - const regex = /(?<=\w)(['\u2018\u2019])(\w)/g; - return input.replace( - regex, - (_, apostrophe, letter) => `${apostrophe}${letter.toLowerCase()}`, - ); -}; +const capitalize = (str: string): string => + str && str[0].toUpperCase() + str.slice(1); /** - * Capitalize first character for string + * Determines if a word should be capitalized based on AP style rules * - * @param {string} value - * @returns {string} + * Rules: + * 1.First and last words are always capitalized + * 2. Words after colons are capitalized + * 3. Words after opening quotes are capitalized + * 4. Stop words remain lowercase (unless rules 1-3 apply) */ -const capitalize = (value: string) => { - if (!value) { - return ''; - } - return value.charAt(0).toUpperCase() + value.slice(1); +const shouldCapitalize = ( + word: string, + index: number, + words: string[], +): boolean => { + // Always capitalize first and last words + if (index === 0 || index === words.length - 1) return true; + + const prevWord = words[index - 1]; + // Capitalize after colons and opening quotes + if (prevWord.trim() === ':' || QUOTES.has(prevWord)) return true; + + // Stop words remain lowercase + return !STOP_WORDS.has(word.toLowerCase()); }; /** - * Helper to convert text to AP title case - * adapted from https://github.com/words/ap-style-title-case - * text should match https://headlinecapitalization.com/ + * Applies AP style title case to a string + * + * @param value - The string to transform + * @returns Title-cased string following AP style guide * - * @param {string} [value] - * @returns {string} + * @example + * applyApTitleCase("the quick brown fox jumps over the lazy dog") + * // Returns: "The Quick Brown Fox Jumps Over the Lazy Dog" + * + * @example + * applyApTitleCase("iPhone users: here's what you need to know") + * // Returns: "iPhone Users: Here's What You Need to Know" */ export const applyApTitleCase = (value: string): string => { - if (!value) { - return ''; - } - - // Split and filter empty strings - // Boolean here acts as a callback, evaluates each word: - // If it's a non-empty string, keep the word in the array; - // If it's an empty string (or falsy), remove from array. - const allWords = value.split(SEPARATORS).filter(Boolean); // Split and filter empty strings - - const result = allWords - .map((word, index, all) => { - const isAfterColon = index > 0 && all[index - 1].trim() === ':'; + if (!value) return ''; - const isAfterQuote = - index > 0 && - (allWords[index - 1] === "'" || - allWords[index - 1] === '"' || - allWords[index - 1] === '\u2018' || // Opening single quote ’ - allWords[index - 1] === '\u201C'); // Opening double quote “ + // Split into words while preserving all separators + const words = value.split(SEPARATORS).filter(Boolean); - if ( - index === 0 || // first word - index === all.length - 1 || // last word - isAfterColon || // capitalize the first word after a colon - isAfterQuote || // capitalize the first word after a quote - !stop.includes(word.toLowerCase()) // not a stop word - ) { - return capitalize(word); - } + // Apply title case rules to each word + const titleCased = words + .map((word, i) => + shouldCapitalize(word, i, words) ? capitalize(word) : word.toLowerCase(), + ) + .join(''); - return word.toLowerCase(); - }) - .join(''); // join without additional spaces - return lowercaseAfterApostrophe(result); + // Post-processing: handle special cases + return titleCased + .replace(APOSTROPHES, (_, apos, letter) => `${apos}${letter.toLowerCase()}`) + .replace(APPLE_PRODUCTS, 'i$1'); }; diff --git a/src/curated-corpus/components/actions/RemoveSectionItemAction/RemoveSectionItemAction.test.tsx b/src/curated-corpus/components/actions/RemoveSectionItemAction/RemoveSectionItemAction.test.tsx index 7c27b964a..297b11220 100644 --- a/src/curated-corpus/components/actions/RemoveSectionItemAction/RemoveSectionItemAction.test.tsx +++ b/src/curated-corpus/components/actions/RemoveSectionItemAction/RemoveSectionItemAction.test.tsx @@ -4,9 +4,7 @@ import { render, screen } from '@testing-library/react'; import { SnackbarProvider } from 'notistack'; import { RemoveSectionItemAction } from './RemoveSectionItemAction'; import { getTestApprovedItem } from '../../../helpers/approvedItem'; -import { - successMock, -} from '../../../integration-test-mocks/removeSectionItem'; +import { successMock } from '../../../integration-test-mocks/removeSectionItem'; import userEvent from '@testing-library/user-event'; import { apolloCache } from '../../../../api/client'; import { diff --git a/temp/applyApTitleCase.js b/temp/applyApTitleCase.js new file mode 100644 index 000000000..774767036 --- /dev/null +++ b/temp/applyApTitleCase.js @@ -0,0 +1,79 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.applyApTitleCase = exports.lowercaseAfterApostrophe = exports.stop = exports.SEPARATORS = exports.STOP_WORDS = void 0; +exports.STOP_WORDS = 'a an and as at but by for if in nor of off on or out so the to yet'; +// Matches a colon (:) and 0+ white spaces following after +// Matches 1+ white spaces +// Matches special chars (i.e. hyphens, quotes, etc) +exports.SEPARATORS = /(:\s*|\s+|[-‑–—,:;!?()“”'‘"])/; // Include curly quotes as separators +exports.stop = exports.STOP_WORDS.split(' '); +/** + * Format a string: Match the letter after an apostrophe & capture the apostrophe and matched char. + * Lowercase the captured letter & return the formatted string. + * Exception: O' prefix (like O'Hearn) should have the letter after apostrophe capitalized. + * @param input + * @returns {string} + */ +const lowercaseAfterApostrophe = (input) => { + // Match either an ASCII or curly apostrophe followed by a letter, after a word character. + // Negative lookbehind to exclude O' prefix + const regex = /(? `${apostrophe}${letter.toLowerCase()}`); +}; +exports.lowercaseAfterApostrophe = lowercaseAfterApostrophe; +/** + * Capitalize first character for string + * + * @param {string} value + * @returns {string} + */ +const capitalize = (value) => { + if (!value) { + return ''; + } + return value.charAt(0).toUpperCase() + value.slice(1); +}; +/** + * Helper to convert text to AP title case + * adapted from https://github.com/words/ap-style-title-case + * text should match https://headlinecapitalization.com/ + * + * @param {string} [value] + * @returns {string} + */ +const applyApTitleCase = (value) => { + if (!value) { + return ''; + } + // Split and filter empty strings + // Boolean here acts as a callback, evaluates each word: + // If it's a non-empty string, keep the word in the array; + // If it's an empty string (or falsy), remove from array. + const allWords = value.split(exports.SEPARATORS).filter(Boolean); // Split and filter empty strings + const result = allWords + .map((word, index, all) => { + const isAfterColon = index > 0 && all[index - 1].trim() === ':'; + const isAfterQuote = index > 0 && + (allWords[index - 1] === "'" || + allWords[index - 1] === '"' || + allWords[index - 1] === '\u2018' || // Opening single quote ' + allWords[index - 1] === '\u201C'); // Opening double quote " + if (index === 0 || // first word + index === all.length - 1 || // last word + isAfterColon || // capitalize the first word after a colon + isAfterQuote || // capitalize the first word after a quote + !exports.stop.includes(word.toLowerCase()) // not a stop word + ) { + return capitalize(word); + } + return word.toLowerCase(); + }) + .join(''); // join without additional spaces + // Apply special formatting rules + let formattedResult = (0, exports.lowercaseAfterApostrophe)(result); + // Handle special cases like iPhone, iPad, iPod, etc. + // This regex looks for word boundaries followed by capital I and then Phone/Pad/Pod/etc. + formattedResult = formattedResult.replace(/\bI(Phone|Pad|Pod|Mac|Cloud|Tunes|Books|Message)/g, 'i$1'); + return formattedResult; +}; +exports.applyApTitleCase = applyApTitleCase;