From 0d645334cb40e5bd3f2b320ab5793f92595b875c Mon Sep 17 00:00:00 2001 From: Sarjuuk Date: Thu, 5 Mar 2026 11:37:55 +0100 Subject: [PATCH] Sitemap * generate and link basic sitemap --- endpoints/sitemap/sitemap.php | 52 +++++ includes/cfg.class.php | 2 +- includes/components/sitemap.class.php | 181 ++++++++++++++++++ setup/tools/clisetup/siteconfig.us.php | 2 +- setup/tools/filegen/robots.ss.php | 24 +++ .../tools/filegen/templates/robots.txt.in | 13 ++ 6 files changed, 272 insertions(+), 2 deletions(-) create mode 100644 endpoints/sitemap/sitemap.php create mode 100644 includes/components/sitemap.class.php create mode 100644 setup/tools/filegen/robots.ss.php rename robots.txt => setup/tools/filegen/templates/robots.txt.in (50%) diff --git a/endpoints/sitemap/sitemap.php b/endpoints/sitemap/sitemap.php new file mode 100644 index 00000000..09e5ca53 --- /dev/null +++ b/endpoints/sitemap/sitemap.php @@ -0,0 +1,52 @@ + ['filter' => FILTER_VALIDATE_INT, 'options' => ['min_value' => 1]] + ); + + private string $page; + + public function __construct(string $pageParam) + { + $this->page = $pageParam; + + parent::__construct($pageParam); + } + + protected function generate() : void + { + if ($xml = Sitemap::generate($this->page, $this->_get['page'] ?? 1)) + $this->result = $xml; + else if (Sitemap::$maxPage) + (new TemplateResponse($this->page))->generateNotFound(Sitemap::ERR_TITLE, sprintf(Sitemap::ERR_OFFSET, Sitemap::$maxPage)); + else + (new TemplateResponse($this->page))->generateNotFound(Sitemap::ERR_TITLE, Sitemap::ERR_PAGE); + } + + public function getCacheKeyComponents() : array + { + $misc = $this->page . serialize($this->_get['page'] ?? 1); + + return array( + -1, // DBType + -1, // DBTypeId/category + -1, // staff mask (content does not diff) + md5($misc) // misc + ); + } +} + +?> diff --git a/includes/cfg.class.php b/includes/cfg.class.php index 5a8de443..56b287c8 100644 --- a/includes/cfg.class.php +++ b/includes/cfg.class.php @@ -55,7 +55,7 @@ class Cfg 'profiler_enable' => ['realms', 'realmMenu'], 'battlegroup' => ['realms', 'realmMenu'], 'name_short' => ['searchplugin', 'searchboxBody', 'searchboxScript', 'demo'], - 'site_host' => ['searchplugin', 'searchboxBody', 'searchboxScript', 'demo', 'power'], + 'site_host' => ['searchplugin', 'searchboxBody', 'searchboxScript', 'demo', 'power', 'robots'], 'static_host' => ['searchplugin', 'searchboxBody', 'searchboxScript', 'power'], 'contact_email' => ['globaljs'], 'locales' => ['globaljs'] diff --git a/includes/components/sitemap.class.php b/includes/components/sitemap.class.php new file mode 100644 index 00000000..cdf87c67 --- /dev/null +++ b/includes/components/sitemap.class.php @@ -0,0 +1,181 @@ + [Type::NPC, '::creature', 'IF(x.`cuFlags` & 0x40000000, 0.1, 0.4)'], + 'object' => [Type::OBJECT, '::objects', 'IF(x.`cuFlags` & 0x40000000, 0.1, 0.4)'], + 'item' => [Type::ITEM, '::items', 'IF(x.`cuFlags` & 0x40000000, 0.1, IF(src.`typeId` IS NULL, 0.5, 0.7))'], + 'itemset' => [Type::ITEMSET, '::itemset', 'IF(x.`cuFlags` & 0x40000000, 0.1, 0.7)'], + 'quest' => [Type::QUEST, '::quests', 'IF(x.`cuFlags` & 0x40000000, 0.1, IF(src.`typeId` IS NULL, 0.3, 0.5))'], + 'spell' => [Type::SPELL, '::spell', 'IF(x.`cuFlags` & 0x40000000, 0.1, IF(src.`typeId` IS NULL, 0.5, 0.8))'], + 'zone' => [Type::ZONE, '::zones', 'IF(x.`cuFlags` & 0x40000000, 0.1, 0.4)'], + 'faction' => [Type::FACTION, '::factions', 'IF(x.`cuFlags` & 0x40000000, 0.1, 0.4)'], + 'pet' => [Type::PET, '::pet', 'IF(x.`cuFlags` & 0x40000000, 0.1, 0.4)'], + 'achievement' => [Type::ACHIEVEMENT, '::achievement', 'IF(x.`cuFlags` & 0x40000000, 0.1, IF(x.`category` = 81, 0.6, IF(x.`category` IN (1, 122, 133, 141, 134, 14807, 131, 130, 128, 132, 21, 124, 135, 126, 154, 125, 140, 145, 147, 136, 127, 152, 153, 191, 123, 14822, 14821, 14823, 137, 178, 173, 14963, 15021, 15062), 0.3, 0.4)))'], + 'title' => [Type::TITLE, '::titles', 'IF(x.`cuFlags` & 0x40000000, 0.1, IF(src.`typeId` IS NULL, 0.3, 0.4))'], + 'event' => [Type::WORLDEVENT, '::events', 'IF(x.`cuFlags` & 0x40000000, 0.1, IF(x.`holidayId` = 0, 0.2, 0.4))'], + 'class' => [Type::CHR_CLASS, '::classes', 'IF(x.`cuFlags` & 0x40000000, 0.1, 0.7)'], + 'race' => [Type::CHR_RACE, '::races', 'IF(x.`cuFlags` & 0x40000000, 0.1, 0.7)'], + 'skill' => [Type::SKILL, '::skillline', 'IF(x.`cuFlags` & 0x40000000, 0.1, IF(x.`typeCat` IN(11, 9), 0.5, IF(x.`typeCat` IN (8, 6), 0.4, 0.3)))'], + 'currency' => [Type::CURRENCY, '::currencies', 'IF(x.`cuFlags` & 0x40000000, 0.1, IF(x.`category` = 3, 0.2, IF(x.`description_loc0`, 0.4, 0.3)))'], + 'sound' => [Type::SOUND, '::sounds', 'IF(x.`cuFlags` & 0x40000000, 0.1, 0.3)'], + 'icons' => [Type::ICON, '::icons', 'IF(x.`cuFlags` & 0x40000000, 0.1, 0.3)'], + 'emote' => [Type::EMOTE, '::emotes', 'IF(x.`cuFlags` & 0x40000000, 0.1, 0.3)'], + 'enchantment' => [Type::ENCHANTMENT, '::itemenchantment', 'IF(x.`cuFlags` & 0x40000000, 0.1, IF(x.`type1` IN (1, 7) OR x.`type2` IN (1, 7) OR x.`type3` IN (1, 7), 0.4, 0.3))'], + 'areatrigger' => [Type::AREATRIGGER, '::areatrigger', 'IF(x.`cuFlags` & 0x40000000, 0.1, 0.3)'], + 'mail' => [Type::MAIL, '::mails', 'IF(x.`cuFlags` & 0x40000000, 0.1, 0.3)'] + // 'guide' => [Type::GUIDE, '::guides', ''] super low prio .. need a way to filter for publicly visible guides + ); + + public static function generate(string $page, int $offset) : ?string + { + self::$page = $page; + self::$offset = $offset; + + if (!self::$page) + return self::getIndex(); + else if (self::$page == 'special') + return self::getSpecial(); + else if (isset(self::$validPages[self::$page][1])) + return self::getPage(); + + // whoops! + return null; + } + + private static function getIndex() : ?string + { + $root = new SimpleXML(''); + $root->addAttribute('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9'); + + $root->addChild('sitemap')->addChild('loc', Cfg::get('HOST_URL').'/?sitemap=special'); + + foreach (self::$validPages as $page => [, $table, ]) + { + $n = DB::Aowow()->selectCell('SELECT CEIL(COUNT(*) / %i) FROM %n', self::MAX_ENTRIES, $table); + for ($i = 1; $i <= $n; $i++) + $root->addChild('sitemap')->addChild('loc', Cfg::get('HOST_URL').'/?sitemap='.$page.'&page='.$i); + } + + return $root->asXML() ?: null; + } + + private static function getSpecial() : ?string + { + if (self::$offset != 1) + { + self::$maxPage = 1; + return null; + } + + $root = new SimpleXML(''); + $root->addAttribute('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9'); + + // home + $url = $root->addChild('url'); + $url->addChild('loc', Cfg::get('HOST_URL')); + $url->addChild('priority', 1); + $url->addChild('changefreq', 'monthly'); + + // talent calc + $url = $root->addChild('url'); + $url->addChild('loc', Cfg::get('HOST_URL').'/?talent'); + $url->addChild('priority', 1); + $url->addChild('changefreq', 'yearly'); + + // pet calc + $url = $root->addChild('url'); + $url->addChild('loc', Cfg::get('HOST_URL').'/?petcalc'); + $url->addChild('priority', 0.8); + $url->addChild('changefreq', 'yearly'); + + // item compare + $url = $root->addChild('url'); + $url->addChild('loc', Cfg::get('HOST_URL').'/?compare'); + $url->addChild('priority', 0.9); + $url->addChild('changefreq', 'yearly'); + + // profiler + if (Cfg::get('PROFILER_ENABLE')) + { + $url = $root->addChild('url'); + $url->addChild('loc', Cfg::get('HOST_URL').'/?profiler'); + $url->addChild('priority', 1); + $url->addChild('changefreq', 'yearly'); + } + + // maps + $url = $root->addChild('url'); + $url->addChild('loc', Cfg::get('HOST_URL').'/?maps'); + $url->addChild('priority', 0.7); + $url->addChild('changefreq', 'yearly'); + + return $root->asXML(); + } + + private static function getPage() : ?string + { + [$type, $table, $prioString] = self::$validPages[self::$page]; + + $n = DB::Aowow()->selectCell('SELECT CEIL(COUNT(*) / %i) FROM %n', self::MAX_ENTRIES, $table); + if (self::$offset <= 0 || self::$offset > $n) + { + self::$maxPage = $n; + return null; + } + + $root = new SimpleXML(''); + $root->addAttribute('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9'); + + $rows = DB::Aowow()->selectAssoc( + 'SELECT x.`id` AS ARRAY_KEY, ('.$prioString.') AS "priority", GREATEST(IFNULL(MAX(ss.`date`), 0), IFNULL(MAX(vi.`date`), 0), IFNULL(MAX(co.`date`), 0)) AS "lastmod" FROM %n x + LEFT JOIN ::source src ON src.`type` = %i AND src.`typeId` = x.`id` + LEFT JOIN ::comments co ON co.`type` = %i AND co.`typeId` = x.`id` AND (co.`flags` & %i) = 0 + LEFT JOIN ::screenshots ss ON ss.`type` = %i AND ss.`typeId` = x.`id` AND (co.`flags` & %i) = 0 AND (co.`flags` & %i) > 0 + LEFT JOIN ::videos vi ON vi.`type` = %i AND vi.`typeId` = x.`id` AND (co.`flags` & %i) = 0 AND (co.`flags` & %i) > 0 + GROUP BY x.`id` LIMIT %i, %i', + $table, + $type, + $type, CC_FLAG_DELETED, + $type, CC_FLAG_DELETED, CC_FLAG_APPROVED, + $type, CC_FLAG_DELETED, CC_FLAG_APPROVED, + self::MAX_ENTRIES * (self::$offset - 1), self::MAX_ENTRIES + ); + + foreach ($rows as $id => $pair) + { + $url = $root->addChild('url'); + $url->addChild('loc', Cfg::get('HOST_URL').'/?'.self::$page.'='.$id); + $url->addChild('priority', $pair['priority']); + $url->addChild('lastmod', date('c', $pair['lastmod'] ?: self::LASTMOD_BASE)); + } + + return $root->asXML(); + } +} + +?> diff --git a/setup/tools/clisetup/siteconfig.us.php b/setup/tools/clisetup/siteconfig.us.php index 80090245..25b8247a 100644 --- a/setup/tools/clisetup/siteconfig.us.php +++ b/setup/tools/clisetup/siteconfig.us.php @@ -406,7 +406,7 @@ CLISetup::registerUtility(new class extends UtilityScript $prot = Cfg::get('FORCE_SSL') ? 'https://' : 'http://'; $cases = array( - 'site_host' => [$prot, Cfg::get('SITE_HOST'), '/robots.txt'], + 'site_host' => [$prot, Cfg::get('SITE_HOST'), '/index.php'], 'static_host' => [$prot, Cfg::get('STATIC_HOST'), '/css/aowow.css'] ); diff --git a/setup/tools/filegen/robots.ss.php b/setup/tools/filegen/robots.ss.php new file mode 100644 index 00000000..b9f8447c --- /dev/null +++ b/setup/tools/filegen/robots.ss.php @@ -0,0 +1,24 @@ + [[], CLISetup::ARGV_PARAM, 'Fills robots.txt with site variables.'] + ); + + protected $fileTemplateSrc = ['robots.txt.in']; + protected $fileTemplateDest = ['robots.txt']; // aowow root +}); + +?> diff --git a/robots.txt b/setup/tools/filegen/templates/robots.txt.in similarity index 50% rename from robots.txt rename to setup/tools/filegen/templates/robots.txt.in index 053da057..ae268c86 100644 --- a/robots.txt +++ b/setup/tools/filegen/templates/robots.txt.in @@ -8,8 +8,21 @@ Disallow: /?profile=* Disallow: /?profiles Disallow: /profiles/ Disallow: /?profiles=* +Disallow: /?guild +Disallow: /guild/ +Disallow: /?guild=* +Disallow: /?guilds +Disallow: /guilds/ +Disallow: /?guilds=* +Disallow: /?arena-team +Disallow: /arena-team/ +Disallow: /?arena-team=* +Disallow: /?arena-teams +Disallow: /arena-teams/ +Disallow: /?arena-teams=* Disallow: /?random Disallow: /random/ Disallow: /?search Disallow: /search/ Disallow: /?search=* +Sitemap: CFG_HOST_URL/?sitemap