From dbcdb719b2159365564c5cd005c81ff1fa77d4ac Mon Sep 17 00:00:00 2001 From: alistairjcbrown <635903+alistairjcbrown@users.noreply.github.com> Date: Tue, 30 Jun 2026 17:48:08 +0000 Subject: [PATCH] Update title normalization test --- cinemas/davidleancinema.org.uk/transform.js | 5 +- cinemas/lumiereromford.com/transform.js | 6 +- common/get-movie-data.js | 1 + common/known-removable-phrases.js | 2 + common/normalize-title.js | 3 +- common/tests/test-titles.json | 336 ++++++++++++++++++++ 6 files changed, 350 insertions(+), 3 deletions(-) diff --git a/cinemas/davidleancinema.org.uk/transform.js b/cinemas/davidleancinema.org.uk/transform.js index 3872d833..8fc132ba 100644 --- a/cinemas/davidleancinema.org.uk/transform.js +++ b/cinemas/davidleancinema.org.uk/transform.js @@ -163,7 +163,10 @@ function toMovie($, showEl) { async function transform({ movieListPage }, sourcedEvents) { const $ = cheerio.load(movieListPage, { xmlMode: true }); const shows = $("venues > venue > shows > show").toArray(); - const movies = shows.map((el) => toMovie($, el)); + const movies = shows + .map((el) => toMovie($, el)) + // Remove template placeholder entries (e.g. "Templates:", "Template - ") + .filter(({ title }) => !/^Templates?\s*[:-]/i.test(title)); if (movies.length === 0) { throw new Error("No movies found - the page structure may have changed"); diff --git a/cinemas/lumiereromford.com/transform.js b/cinemas/lumiereromford.com/transform.js index fc61b61d..513d84fe 100644 --- a/cinemas/lumiereromford.com/transform.js +++ b/cinemas/lumiereromford.com/transform.js @@ -4,10 +4,14 @@ const cinesyncTransform = require("../../common/cinesync.io/transform"); async function transform(data, sourcedEvents) { const events = await cinesyncTransform(attributes, data, sourcedEvents); return events.filter(({ title }) => { + const normalized = title.trim().toLowerCase(); // Remove Basking Babies events (e.g. "Baby Yoga Classes") // "At Lumiere, we are more than just a cinema, we are a community space for // a variety of activities - from art classes to meditation to exercise." - return !title.trim().toLowerCase().endsWith("basking babies"); + if (normalized.endsWith("basking babies")) return false; + // Remove placeholder test entry + if (normalized === "test film") return false; + return true; }); } diff --git a/common/get-movie-data.js b/common/get-movie-data.js index a47e8303..5f647403 100644 --- a/common/get-movie-data.js +++ b/common/get-movie-data.js @@ -58,6 +58,7 @@ const ignoredIds = [ 41233, // Step Up 3D -- https://www.themoviedb.org/movie/41233-step-up-3d 129284, // The Dude -- https://www.themoviedb.org/movie/129284-the-dude 1666176, // Inferno -- https://www.themoviedb.org/movie/1666176-inferno + 1662113, // Test Film -- https://www.themoviedb.org/movie/1662113-test-film ]; /** diff --git a/common/known-removable-phrases.js b/common/known-removable-phrases.js index f24b7879..3bba6ca9 100644 --- a/common/known-removable-phrases.js +++ b/common/known-removable-phrases.js @@ -517,6 +517,7 @@ const knownRemovablePhrases = [ "Barbican Library Rhyme Time", "Baz Luhrmann Season", "Baz Luhrmann's", + "Bedazzle Club", "Best Of X-Men:", "Best of 2025:", "Belgrade Noir:", @@ -1292,6 +1293,7 @@ const knownRemovablePhrases = [ "UK Theatrical Premiere", "Exclusive UK Premiere", "UK Premiere", + "World Theatrical Premiere", "World Premiere", "Premiere for", "the premiere", diff --git a/common/normalize-title.js b/common/normalize-title.js index c6852e63..9653d24d 100644 --- a/common/normalize-title.js +++ b/common/normalize-title.js @@ -522,6 +522,7 @@ function normalizeTitle(title, options) { "mystery movie", ], [/^.* \+ mystery movie/i, "mystery movie"], + [/Surprise Film (\d{1,2}\.\d{1,2}\.\d{1,2})?/i, "mystery movie"], ["vhs film", "movie"], [/The Bill Reunion \d+/i, "The Bill Reunion"], ["R.E.M. Buster", "R.E.M. X Buster"], @@ -848,7 +849,7 @@ function normalizeTitle(title, options) { "What's Up Daiquiris, bag switcheroos: ", ], ["One Day in Whitechapel + ", "One Day in Whitechapel & "], - ["Backrooms: Everything Must Go", "Backrooms"], + [/Backrooms\s*:\s+Everything Must Go( Bonus)?( Edition)?/i, "Backrooms"], ]; corrections.forEach(([phrase, replacement]) => { diff --git a/common/tests/test-titles.json b/common/tests/test-titles.json index 7add2189..d418732b 100644 --- a/common/tests/test-titles.json +++ b/common/tests/test-titles.json @@ -79134,5 +79134,341 @@ { "input": "TFFF: SOFTSHELL + Q&A", "output": "softshell" + }, + { + "input": "ROSEBUSH PRUNING", + "output": "rosebush pruning" + }, + { + "input": "THE LAST ONE FOR THE ROAD", + "output": "last one for the road" + }, + { + "input": "Members' Screening: The Invite", + "output": "the invite" + }, + { + "input": "Parent & Baby Screening: The Invite", + "output": "the invite" + }, + { + "input": "The Undeclared War Series 2 + Q&A with creator Colin Callender, cast member Simon Pegg and more", + "output": "undeclared war series 2" + }, + { + "input": "UK Premiere of 4K Restoration: In Which Annie Gives It Those Ones + Q&A with Arundhati Roy", + "output": "in which annie gives it those ones" + }, + { + "input": "Mulan", + "output": "mulan" + }, + { + "input": "£2 Family Films : Charlie The Wonderdog", + "output": "charlie the wonderdog" + }, + { + "input": "Backrooms : Everything Must Go with Bonus Footage", + "output": "backrooms" + }, + { + "input": "Roll No. 1 (Nepali)", + "output": "roll no 1" + }, + { + "input": "Scorsese Season: Cape Fear (1991)", + "output": "cape fear (1991)" + }, + { + "input": "Scorsese Season: Casino (1995)", + "output": "casino (1995)" + }, + { + "input": "Scorsese Season: Goodfellas (1990)", + "output": "goodfellas (1990)" + }, + { + "input": "Scorsese Season: Raging Bull (1980)", + "output": "raging bull (1980)" + }, + { + "input": "Scorsese Season: Shutter Island (2009)", + "output": "shutter island (2009)" + }, + { + "input": "Scorsese Season: Taxi Driver (50th Anniversary)", + "output": "taxi driver" + }, + { + "input": "Scorsese Season: The Wolf Of Wall Street (2013)", + "output": "wolf of wall street (2013)" + }, + { + "input": "Scorsese Season: The King Of Comedy (1983)", + "output": "king of comedy (1983)" + }, + { + "input": "Deool Band 2 (Marathi)", + "output": "deool band 2" + }, + { + "input": "Duja Rabb (Punjabi)", + "output": "duja rabb" + }, + { + "input": "Gatta Kusthi 2 (Tamil)", + "output": "gatta kusthi 2" + }, + { + "input": "I, Nobody (Malayalam)", + "output": "i nobody" + }, + { + "input": "Uyir (Malayalam)", + "output": "uyir" + }, + { + "input": "BACKROOMS: EVERYTHING MUST GO WITH BONUS FOOTAGE", + "output": "backrooms" + }, + { + "input": "Derek vs Derek", + "output": "derek vs derek" + }, + { + "input": "TRANSMISSIONS PRESENTS: Sightseers", + "output": "sightseers" + }, + { + "input": "COME SEE ME IN THE GOOD LIGHT", + "output": "come see me in the good light" + }, + { + "input": "FAME (Dementia-Friendly)", + "output": "fame" + }, + { + "input": "FINDING EMILY (Babes-In-Arms Screening)", + "output": "finding emily" + }, + { + "input": "ISH", + "output": "ish" + }, + { + "input": "MINIONS & MONSTERS", + "output": "minions monsters" + }, + { + "input": "NINO", + "output": "nino" + }, + { + "input": "NT LIVE: LES LIAISONS DANGEREUSES", + "output": "national theatre live les liaisons dangereuses" + }, + { + "input": "Parent and Baby: The Invite", + "output": "the invite" + }, + { + "input": "Marvel Studios' Infinity Saga Concert Experience", + "output": "marvel studios infinity saga concert experience" + }, + { + "input": "Sleepless in Seattle in Concert", + "output": "sleepless in seattle" + }, + { + "input": "Backrooms: Everything Must Go Edition w/ Bonus Footage", + "output": "backrooms" + }, + { + "input": "Screening + Q&A: Inside the Manosphere with Louis Theroux", + "output": "inside the manosphere with louis theroux" + }, + { + "input": "Screening + Q&A: Into the Darkness, Life Inside Israeli Prisons", + "output": "into the darkness life inside israeli prisons" + }, + { + "input": "Black God, White Devil (1964, Glauber Rocha) 4k Restoration - TFFF", + "output": "black god white devil" + }, + { + "input": "Faces of Death (1978) world theatrical premiere + Video intro by Daniel Goldhaber - TFFF", + "output": "faces of death (1978)" + }, + { + "input": "Free Cinema London: Watch an Award-Winning Film for Fun Research", + "output": "free cinema london watch an awardwinning film for fun research" + }, + { + "input": "A Goodnight Kiss", + "output": "a goodnight kiss" + }, + { + "input": "Algiers", + "output": "algiers" + }, + { + "input": "Deool Band 2", + "output": "deool band 2" + }, + { + "input": "Chicken Run (2000)", + "output": "chicken run (2000)" + }, + { + "input": "Early Man (2018)", + "output": "early man (2018)" + }, + { + "input": "Flushed Away (2006)", + "output": "flushed away (2006)" + }, + { + "input": "Wallace & Gromit: A Grand Day Out/The Wrong Trousers", + "output": "wallace gromit a grand day out/the wrong trousers" + }, + { + "input": "Wallace & Gromit: The Curse of the Were-Rabbit (2005)", + "output": "wallace gromit the curse of the wererabbit (2005)" + }, + { + "input": "Backrooms: Everything Must Go w/ Bonus Footage", + "output": "backrooms" + }, + { + "input": "Gatta Kusthi 2", + "output": "gatta kusthi 2" + }, + { + "input": "I, Nobody", + "output": "i nobody" + }, + { + "input": "Backrooms: Everything Must Go Edition with Bonus Footage", + "output": "backrooms" + }, + { + "input": "Backrooms: Everything must Go Bonus Edition", + "output": "backrooms" + }, + { + "input": "Nirvanna:The Band, The Show, The Movie", + "output": "nirvanna the band the show" + }, + { + "input": "TFFF: Captain America", + "output": "captain america" + }, + { + "input": "TFFF: City Wide Fever", + "output": "city wide fever" + }, + { + "input": "TFFF: Godmonster of Indian Flats", + "output": "godmonster of indian flats" + }, + { + "input": "TFFF: Hellbreeder", + "output": "hellbreeder" + }, + { + "input": "TFFF: Nemesis", + "output": "nemesis" + }, + { + "input": "TFFF: Surprise Film 7.8.26", + "output": "mystery movie" + }, + { + "input": "TFFF: Surprise Film 9.8.26", + "output": "mystery movie" + }, + { + "input": "Parents & Baby Screening: The Invite", + "output": "the invite" + }, + { + "input": "Phoenix Classics: Taxi Driver (50th Anniversary)", + "output": "taxi driver" + }, + { + "input": "Brunello: The Gracious Visionary", + "output": "brunello the gracious visionary" + }, + { + "input": "Jet Lag: Japanorama Livestream Premiere", + "output": "jet lag japanorama" + }, + { + "input": "Out at Clapham: WHAM! 10 Days in China", + "output": "wham 10 days in china" + }, + { + "input": "CLUB ROOM: Sherlock Jr. With Live Score", + "output": "sherlock jr" + }, + { + "input": "LAFF 2026: Red Dog", + "output": "red dog" + }, + { + "input": "Hobsons Choice", + "output": "hobsons choice" + }, + { + "input": "Room At the Top", + "output": "room at the top" + }, + { + "input": "Film Stories Live with Simon Brew", + "output": "film stories live with simon brew" + }, + { + "input": "Princess Diaries Bedazzle Club", + "output": "princess diaries" + }, + { + "input": "A Year in London", + "output": "a year in london" + }, + { + "input": "Preview: Warla + Q&A", + "output": "warla" + }, + { + "input": "Pride in Ordinary Times: Contemporary Chinese Queer Shorts + Panel Discussion", + "output": "pride in ordinary times contemporary chinese queer shorts" + }, + { + "input": "The Serpent's Skin", + "output": "serpents skin" + }, + { + "input": "UK Premiere: A Year in London + Q&A", + "output": "a year in london" + }, + { + "input": "Pitchblack Playback: Thom Yorke 'The Eraser' (20th Anniversary)", + "output": "thom yorke the eraser" + }, + { + "input": "Collage making workshop: Summer", + "output": "collage making workshop summer" + }, + { + "input": "Disappearing Images (4): Existers - WHEN IT'S GONE IT'S GONE", + "output": "disappearing images" + }, + { + "input": "TFFF: AFFECTION AFFECTION + Q&A", + "output": "affection affection" + }, + { + "input": "TFFF: ERAMUS + Q&A", + "output": "eramus" } ]