Utilisateur:ZX81-bot/AWB/Modules/Cleanup

Un article de Wikipédia, l'encyclopédie libre.

using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using WikiFunctions;
 
namespace AutoWikiBrowser
{
    class Module1 : WikiFunctions.Plugin.IModule
    {
// à copier/coller dans le module à partir d'ici
 
        /*
         * Module AWB de corrections de typographie et de syntaxe wiki :
         * - nettoyage des bandeaux ;
         * - nettoyage des catégories ;
         * - nettoyage des images ;
         * - nettoyage des modèles ;
         * - nettoyage des liens internes ;
         * - nettoyage clé de tri DEFAULTSORT ;
         * - ajout clé de tri DEFAULTSORT si personnalité ;
         * - ajout du modèle {{langue|ja|...}} sur les kanjis ;
         * - ajout sépérateur {{,}} sur références ;
         * - typographie des pourcentages ;
         * - typographie des nombres ({{formatnum:}} ;
         * - typographie et wikification des siècles ;
         * - typographie et wikification des nièmes.
         *
         * Auteur : [[:fr:User:TiChou]]
         * Date création : juillet 2007
         * Date révision : 18 août 2007
         */
 
        string[] BandeauxUniques;
        int nbBU;
        public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
        {
            Skip = false;
            Summary = "";
 
            string ArticleTextOld = ArticleText;
 
            BandeauxUniques = new string[100];
            nbBU = 0;
 
            // Remplacement des divers modèles de multi bandeaux par le modèle {{Multi bandeau}}
            ArticleText = Regex.Replace(ArticleText,
                @"\{\{[ _]*(?:(?::?[ _]*fr[ _]*:[ _]*)?(?:Modèle|Template)[ _]*:[ _]*)?(?:Multi[ _]+bandeaux?|Multi-?bandeau|Multib)[ _]*(\|[^\}]*)?\}\}",
                "{{Multi bandeau$1}}",
                RegexOptions.IgnoreCase);
 
            // Suppression des modèles multi bandeaux ou des paramètres vides se trouvant à l'intérieur d'un modèle {{Multi bandeau}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<=\{\{Multi bandeau(?:\|[^\|\}]*)*?)\| *(?:(?::? *fr *: *)?(?:Modèle|Template) *: *)?(?:Multi[ _]+bandeaux?|Multi-?bandeau|Multib|) *(?=(?:\|[^\|\}]*)*\}\})",
                "",
                RegexOptions.IgnoreCase);
 
            // Nettoyage des (multi-)bandeaux et suppression des doublons
            ArticleText = Regex.Replace(ArticleText,
                @"(\{\{(?:Multi bandeau\|)?)((?<=\{\{Multi bandeau\|)[^\}]+|(?<=\{\{) *(?:(?::? *fr *: *)?(?:Modèle|Template) *: *)?(?:Ébauche(?:s?|[ _]+[^\|\}]+)|Portail[ _]+[^\|\}]+))(\}\}(?s:\s*))",
                new MatchEvaluator(NettoyageBandeau),
                RegexOptions.IgnoreCase);
 
            // Groupement des bandeaux ébauches dans un unique modèle {{Multi bandeau}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?:\{\{(?:Multi bandeau\|)?((?<=\{\{Multi bandeau\|)(?:[^\}]+\|)?Ébauche[s ][^\}]+|Ébauche(?:s?| [^\|\}]+))\}\}(\s*)){2,}",
                new MatchEvaluator(MultiBandeau),
                RegexOptions.Singleline);
 
            // Groupement des bandeaux portail dans un unique modèle {{Multi bandeau}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?:\{\{(?:Multi bandeau\|)?((?<=\{\{Multi bandeau\|)(?:[^\}]+\|)?Portail [^\}]+|Portail [^\|\}]+)\}\}(\s*)){2,}",
                new MatchEvaluator(MultiBandeau),
                RegexOptions.Singleline);
 
            // Suppression des modèles {{Multi bandeau}} vide ou avec un seul bandeau
            ArticleText = Regex.Replace(ArticleText,
                @"\{\{Multi bandeau\|?\}\}\s*|(?<=\{\{)Multi bandeau\|(?=[^\|\}]+\}\})",
                "");
 
            // Correction des retours à la ligne entre bandeaux et catégories
            ArticleText = Regex.Replace(ArticleText,
                @"(?<=\{\{(?:Multi bandeau\|[^\}]+|Portail [^\|\}]+)\}\})\s*(?=(?:\{\{[ _]*DEFAULTSORT[ _]*:[^\}]+\}\}\s*)?\[\[ *Catégorie *:[^\]]+\]\].*)",
                "\r\n\r\n",
                RegexOptions.Singleline);
            if (ArticleText != ArticleTextOld)
            {
                Summary += "net. bandeaux, ";
                ArticleTextOld = ArticleText;
            }
 
            // Nettoyage des catégories
            ArticleText = Regex.Replace(ArticleText,
                @"(?<=\[\[)[ _]*(?:(:)[ _]*)?" +
                @"(?:" + Variables.LangCode.ToString().ToLower() + @"[ _]*:[ _]*)?" +
                @"(?:Category|" + Variables.Namespaces[14].Replace(":", "") + @")[ _]*:[ _]*" +
                @"(?=[^\]]+\]\])",
                "$1" + Variables.Namespaces[14], RegexOptions.IgnoreCase);
            // Majuscule de la 1ère lettre des noms de catégorie
            ArticleText = Regex.Replace(ArticleText,
                @"(?<=\[\[" +
                Variables.Namespaces[14] +
                @")\p{Ll}",
                new MatchEvaluator(UpFirstChar));
            if (ArticleText != ArticleTextOld)
            {
                Summary += "net. catégories, ";
                ArticleTextOld = ArticleText;
            }
 
            // ajout DEFAULTSORT si personne
            Match match;
            string key;
            if (!Regex.IsMatch(ArticleText, @"\{\{[ _]*DEFAULTSORT[ _]*[:\|][^\}]*\}\}") &&
                Regex.IsMatch(ArticleText, @"\[\[" + Variables.Namespaces[14] + @"(?:Naissance|Décès|Pseudonyme|Personnalité)"))
            {
                // Nettoyage DEFAULTSORT
                match = Regex.Match(ArticleText, @"\[\[Catégorie:[^\|\]]+\|((?:[^,\]]+(?=,)|[^\]]{2,})(?:, *[^\]]+)?)\]\]");
                if (!match.Success)
                {
                    key = CleanKey(Tools.MakeHumanCatKey(ArticleTitle));
                }
                else
                {
                    key = CleanKey(match.Groups[1].Value);
                }
 
                ArticleText = Regex.Replace(ArticleText,
                    @"(\[\[Catégorie:[^\]]+\]\].*)",
                    "{{DEFAULTSORT:" + key + "}}\n$1",
                    RegexOptions.Singleline);
 
                ArticleText = Regex.Replace(ArticleText,
                    @"(?<=\[\[Catégorie:[^\|\]]+)\|[^\]]{2,}(?=\]\])",
                    "");
                if (ArticleText != ArticleTextOld)
                {
                    Summary += "ajout defaultsort, ";
                    ArticleTextOld = ArticleText;
                }
            }
            // Nettoyage DEFAULTSORT
            else
            {
                ArticleText = Regex.Replace(ArticleText,
                    @"(?<=\{\{)[ _]*DEFAULTSORT[ _]*[:\|][ _]*([^\}]*?)[ _]*(?=\}\})",
                    new MatchEvaluator(Key),
                    RegexOptions.IgnoreCase);
                ArticleText = Regex.Replace(ArticleText,
                    @"(?<=\{\{DEFAULTSORT:[^\}]*\}\})\s*(?=\[\[" +
                    Variables.Namespaces[14] +
                    @")",
                    "\r\n",
                    RegexOptions.Singleline);
                if (ArticleText != ArticleTextOld)
                {
                    Summary += "net. defaultsort, ";
                    ArticleTextOld = ArticleText;
                }
            }
 
            // Nettoyage des images
            ArticleText = Regex.Replace(ArticleText,
                @"(?<=\[\[)[ _]*(?:(:)[ _]*)?" +
                @"(?:" + Variables.LangCode.ToString().ToLower() + @"[ _]*:[ _]*)?" +
                @"(?:Category|" + Variables.Namespaces[6].Replace(":", "") + @")[ _]*:[ _]*",
                "$1" + Variables.Namespaces[6], RegexOptions.IgnoreCase);
            // Majuscule de la 1ère lettre des noms d'image
            ArticleText = Regex.Replace(ArticleText,
                @"(?<=\[\[" +
                Variables.Namespaces[6] +
                @")\p{Ll}",
                new MatchEvaluator(UpFirstChar));
            if (ArticleText != ArticleTextOld)
            {
                Summary += "net. images, ";
                ArticleTextOld = ArticleText;
            }
 
            // Nettoyage des modèles
            ArticleText = Regex.Replace(ArticleText,
                @"(?<=\{\{)[ _]*" +
                @"(?::?[ _]*" + Variables.LangCode.ToString().ToLower() + @"[ _]*:[ _]*)?" +
                @"(?:Template|" + Variables.Namespaces[10].Replace(":", "") + @")[ _]*:[ _]*" +
                @"(?=[^\}]+\}\})",
                "",
                RegexOptions.IgnoreCase);
            if (ArticleText != ArticleTextOld)
            {
                Summary += "net. modèles, ";
                ArticleTextOld = ArticleText;
            }
 
            // Nettoyage des liens
            ArticleText = Regex.Replace(ArticleText,
                @"(?<=\[\[)[ _]*" +
                @"(?::?[ _]*" + Variables.LangCode.ToString().ToLower() + @"[ _]*:[ _]*)?" +
                @"(?=[^\]]+\]\])",
                "",
                RegexOptions.IgnoreCase);
            if (ArticleText != ArticleTextOld)
            {
                Summary += "net. liens, ";
                ArticleTextOld = ArticleText;
            }
 
            if (!Regex.IsMatch(ArticleText, @"\{\{Unicode ?chinois\}\}|\{\{(?:Multi bandeau\s*\|[^\}]*)?Portail Chine(?:\|[^\}]*)?\}\}", RegexOptions.IgnoreCase) &&
                Regex.IsMatch(ArticleText, @"Japon", RegexOptions.IgnoreCase))
            {
                // Ajout modèle {{langue|ja|...}}
                ArticleText = Regex.Replace(ArticleText,
                    @"(?<!<nowiki>(?s:.(?<!</nowiki>))*)(?<!<pre>(?s:.(?<!</pre>))*)(?<!<math>(?s:.(?<!</math>))*)(?<!<!--(?s:.(?<!-->))*)(?<!<timeline>(?s:.(?<!</timeline>))*)(?<!\[\[(?:.(?<!\]\]|\|))*)(?<!\[\[(?:Catégorie|Category)\s*:(?:.(?<!\]\]))*)(?<!\{\{(?:.(?<!\}\}|\|))*)(?<!\{\{DEFAULTSORT\s*:(?:.(?<!\}\}))*)(?<!\{\{(?:langue|lang|Langue avec nom|Japonais|Nihongo|nji|Nom japonais inversé)\s*\|(?:.(?<!\}\}))*)(?<!\[(?:http|https|ftp|mailto):(?:.(?<!\]\]|\s))*)(([\p{IsKatakanaPhoneticExtensions}\p{IsEnclosedCJKLettersandMonths}\p{IsHiragana}\p{IsKatakana}\p{IsCJKUnifiedIdeographs}\p{IsCJKSymbolsandPunctuation}]+\s*)+(?<!\s))",
                    "{{langue|ja|$1}}",
                    RegexOptions.IgnoreCase);
                if (ArticleText != ArticleTextOld)
                {
                    Summary += "ajout {{langue|ja|...}}, ";
                    ArticleTextOld = ArticleText;
                }
            }
 
            // Ajout séparateur {{,}} entre balises <ref>
            ArticleText = Regex.Replace(ArticleText,
                @"(?<=<\/ref>)\s*,?\s*(?=<ref)",
                "{{,}}",
                RegexOptions.IgnoreCase);
            if (ArticleText != ArticleTextOld)
            {
                Summary += "ajout sép. <ref>, ";
                ArticleTextOld = ArticleText;
            }
 
            // &#160; -> &nbsp;
            ArticleText = Regex.Replace(ArticleText,
                @"&#160;",
                "&nbsp;");
 
            // Typo %
            ArticleText = Regex.Replace(ArticleText,
                @"\b(?<!style=""[^""]+:\s*|(?:width|height)\s*=\s*""?|https?://\S*|\d+[,\.])(\d+)(?:\s*|&(?:nb|thin)sp;)%",
                "$1 %",
                RegexOptions.IgnoreCase);
            ArticleText = Regex.Replace(ArticleText,
                @"\b(?<!style=""[^""]+:\s*|(?:width|height)\s*=\s*""?|https?://\S*)(\d+)[,\.](\d+)(?:\s*|&(?:nb|thin)sp;)%",
                "$1,$2 %",
                RegexOptions.IgnoreCase);
            if (ArticleText != ArticleTextOld)
            {
                Summary += "typo %, ";
                ArticleTextOld = ArticleText;
            }
 
 
            // 1234.56 -> {{formatnum:1234.56}}
            ArticleText = Regex.Replace(ArticleText,
                @"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)[1-9]\d{3,}\.\d+",
                "{{formatnum:$&}}");
 
            // 12.3456 -> {{formatnum:12.3456}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<=\b(?<!\{\{Coor (?:.(?<!\}\}))*|style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)[1-9]\d?)\.(?=\d{4,})",
                ",",
                RegexOptions.IgnoreCase);
 
            // 1234,56 -> {{formatnum:1234.56}}
            ArticleText = Regex.Replace(ArticleText,
                @"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)[1-9]\d{3,},\d{1,2}",
                new MatchEvaluator(formatnum2));
 
            // 1234,5678 -> {{formatnum:1234.5678}}
            ArticleText = Regex.Replace(ArticleText,
                @"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)[1-9]\d{3,},\d{4,}",
                new MatchEvaluator(formatnum2));
 
            // 1 234.56 -> {{formatnum:1234.56}}
            // 1 234 -> {{formatnum:1234}}
            ArticleText = Regex.Replace(ArticleText,
                @"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*|IS[BS]N(?:-1[03] )?[ :=\|]?[-\d]*)[1-9]\d{0,2}[  ](?:\d{3}[  ])*\d{3}(?:\.\d+)?(?![,  ]?\d)",
                new MatchEvaluator(formatnum1));
 
            // 1&nbsp;234.56 -> {{formatnum:1234.56}}
            // 1&nbsp;234 -> {{formatnum:1234}}
            ArticleText = Regex.Replace(ArticleText,
                @"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)[1-9]\d{0,2}&(?:nb|thin)sp;(?:\d{3}&(?:nb|thin)sp;)*\d{3}(?:\.\d+)?(?!(?:,|&(?:nb|thin)sp;)?\d)",
                new MatchEvaluator(formatnum1));
 
            // 1,234.56 -> {{formatnum:1234.56}}
            // 1,234 -> {{formatnum:1234}}
            ArticleText = Regex.Replace(ArticleText,
                @"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)[1-9]\d{0,2},(?:\d{3},)*\d{3}(?:\.\d+)?(?=\D)",
                new MatchEvaluator(formatnum1));
 
            // 1 234,56 -> {{formatnum:1234.56}}
            ArticleText = Regex.Replace(ArticleText,
                @"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*|IS[BS]N(?:-1[03] )?[ :=\|]?[-\d]*)[1-9]\d{0,2}[  ](?:\d{3}[  ])*\d{3}(?:,\d+)?(?![\. ]?\d)",
                new MatchEvaluator(formatnum2));
 
            // 1&nbsp;234,56 -> {{formatnum:1234.56}}
            ArticleText = Regex.Replace(ArticleText,
                @"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)[1-9]\d{0,2}&(?:nb|thin)sp;(?:\d{3}&(?:nb|thin)sp;)*\d{3}(?:,\d+)?(?!(?:\.|&(?:nb|thin)sp;)?\d)",
                new MatchEvaluator(formatnum2));
 
            // 1.234,56 -> {{formatnum:1234.56}}
            // 1.234 -> {{formatnum:1234}}
            ArticleText = Regex.Replace(ArticleText,
                @"\b(?<!style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)[1-9]\d{0,2}\.(?:\d{3}\.)*\d{3}(?:,\d+)?(?=\D)",
                new MatchEvaluator(formatnum2));
 
            // 12.3 -> 12,3
            ArticleText = Regex.Replace(ArticleText,
                @"(?<=\b(?<!\{\{Coor (?:.(?<!\}\}))*|style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?:[1-9]\d+|\d))\.(?=\d+)",
                ",",
                RegexOptions.IgnoreCase);
 
            // 12345 -> {{formatnum:12345}}
            ArticleText = Regex.Replace(ArticleText,
                @"\b(?<!(?s:\{\{(?:Coor |Vue satellite WikiMapia|Traduc|OCLC)(?:.(?<!\}\}))*)|style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*|IS[BS]N(?:-1[03] )?[ :=\|]?[-\d]*|(?:old)?id=|#|\d[\.,])[1-9]\d{4,}",
                "{{formatnum:$&}}",
                RegexOptions.IgnoreCase);
 
            // 3456 -> {{formatnum:3456}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<!(?s:\{\{(?:Coor |Vue satellite WikiMapia|Traduc|OCLC)(?:.(?<!\}\}))*)|style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*|IS[BS]N(?:-1[03] )?[ :=\|]?[-\d]*|(?:old)?id=|#|\d[\.,])\b[3-9]\d{3}\b",
                "{{formatnum:$&}}",
                RegexOptions.IgnoreCase);
            // 2233 -> {{formatnum:2233}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<!(?s:\{\{(?:Coor |Vue satellite WikiMapia|Traduc|OCLC)(?:.(?<!\}\}))*)|style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*|IS[BS]N(?:-1[03] )?[ :=\|]?[-\d]*|(?:old)?id=|#|\d[\.,])\b2[1-9]\d{2}\b",
                "{{formatnum:$&}}",
                RegexOptions.IgnoreCase);
            // 2023 -> {{formatnum:2033}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<!(?s:\{\{(?:Coor |Vue satellite WikiMapia|Traduc|OCLC)(?:.(?<!\}\}))*)|style=""[^""]*|https?://\S*|\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*|IS[BS]N(?:-1[03] )?[ :=\|]?[-\d]*|(?:old)?id=|#|\d[\.,])\b20[2-9]\d\b",
                "{{formatnum:$&}}",
                RegexOptions.IgnoreCase);
 
            // - {{formatnum:1234}} -> {{formatnum:-1234}}
            ArticleText = Regex.Replace(ArticleText,
                @"\b([-\+])(?:[  ]|&(?:nb|thin)sp;)*\{\{formatnum:([\d\.]+)\}\}",
                "{{formatnum:$1$2}}");
 
            if (ArticleText != ArticleTextOld)
            {
                Summary += "typo nombre, ";
                ArticleTextOld = ArticleText;
            }
 
 
            // {{XVIIe siècle|qqchose}} -> {{XVIIe siècle}}
            ArticleText = Regex.Replace(ArticleText,
                @"\{\{[ _]*([XVI]+er?)[ _]+siècle[ _]*( av\. J\.-C\.)?[ _]*\|[^\}]*\}\}",
                "{{$1 siècle$2}}");
 
            // [[XVIIe siècle]] -> {{XVIIe siècle}}
            ArticleText = Regex.Replace(ArticleText,
                @"\[\[[ _]*([XVI]+er?)[ _]+siècle[ _]*\]\]",
                "{{$1 siècle}}");
 
            // [[XVIIe siècle|XVII{{e}} siècle]] -> {{XVIIe siècle}}
            ArticleText = Regex.Replace(ArticleText,
                @"\[\[[ _]*([XVI]+er?)[ _]+siècle[ _]*\| *[XVI]+ *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me\b|[éeè]\b|°)(?:\}\}|</sup>)? +[Ss](?:i[éeè]cle|\.) *\]\]",
                "{{$1 siècle}}");
 
            // [[IIe siècle av. J.-C.]] -> {{IIe siècle av. J.-C.}}
            ArticleText = Regex.Replace(ArticleText,
                @"\[\[[ _]*([CLXVI]+er?)[ _]+siècle[ _]+av\.[ _]+J\.-C\.[ _]*\]\]",
                "{{$1 siècle av. J.-C.}}");
 
            // [[IIe siècle av. J.-C.|II{{e}} siècle av. J.-C.]] -> {{IIe siècle av. J.-C.}}
            ArticleText = Regex.Replace(ArticleText,
                @"\[\[[ _]*([XVI]+er?)[ _]+siècle[ _]+av\.[ _]+J\.-C\.[ _]*\| *[XVI]+ *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me\b|[éeè]\b|°)(?:\}\}|</sup>)? +[Ss](?:i[éeè]cle|\.) +[Ss](?:i[éeè]cle|\.) +[aA]v(?:ant|\.) +J(?:ésus|\.)?[- ]?C(?:hrist|\.)? *\]\]",
                "{{$1 siècle av. J.-C.}}");
 
            // [[Au XVIIe siècle et après]] -> [[Au XVIIe siècle et après|Au {{s-|XVII|e}} et après]]
            ArticleText = Regex.Replace(ArticleText,
                @"(?<=\[\[(?!Catégorie:)):?([^\|\]]*)\b([XVI]+)(er?) +siècle\b([^\|]*?)(?=\]\])",
                "$&|$1{{s-|$2|$3}}$4");
 
            // [[XVIIe siècle|XVII{{e}}]] -> {{s mini|XVII|e}}
            ArticleText = Regex.Replace(ArticleText,
                @"\[\[[ _]*([XVI]+)(er?)[ _]+siècle[ _]*\| *[XVI]+ *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me\b|[éeè]\b|°)(?:\}\}|</sup>)? *\]\]",
                "{{s mini|$1|$2}}");
 
            // [[IIe siècle av. J.-C.|II{{e}}]] -> {{-s mini|II|e}}
            ArticleText = Regex.Replace(ArticleText,
                @"\[\[[ _]*([XVI]+)(er?)[ _]+siècle[ _]+av\.[ _]+J\.-C\.[ _]*\| *[XVI]+ *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me\b|[éeè]\b|°)(?:\}\}|</sup>)? *\]\]",
                "{{-s mini|$1|$2}}");
 
            if (ArticleText != ArticleTextOld)
            {
                Summary += "wikif lien siècle, ";
                ArticleTextOld = ArticleText;
            }
 
 
            // Ier siècle av. J.-C. -> {{-s-|I|er}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?:\bI|\b1|\{\{I\}\}) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me|[éeè°]|er)(?:\}\}|</sup>)? +[Ss](?:i[éeè]cle|\.) +[aA]v(?:ant|\.) +J(?:ésus|\.)?[- ]?C(?:hrist\b|\.|\b)",
                "{{-s-|I|er}}");
 
            // 3e siècle av. J.-C. -> {{-s-|III|e}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)\b([12]?\d) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me|[éeè°])(?:\}\}|</sup>)? +[Ss](?:i[éeè]cle|\.) +[aA]v(?:ant|\.) +J(?:ésus|\.)?[- ]?C(?:hrist\b|\.|\b)",
                "{{-s-|{{subst:Nombre en romain|$1|subst=subst:}}|e}}");
 
            // VIe siècle av. J.-C. -> {{-s-|VI|e}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?:\b([CLXVI]+)|\{\{([CLXVI]+)\}\}) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me|[éeè°])(?:\}\}|</sup>)? +[Ss](?:i[éeè]cle|\.) +[aA]v(?:ant|\.) +J(?:ésus|\.)?[- ]?C(?:hrist\b|\.|\b)",
                "{{-s-|$1$2|e}}");
 
            // {{s-|III|e}} av. J.-C. -> {{-s-|III|e}}
            ArticleText = Regex.Replace(ArticleText,
                @"\{\{s(-)?\|([CLXVI]+\|er?)\}\}s? +[aA]v(?:ant|\.) +J(?:ésus|\.)?[- ]?C(?:hrist\b|\.|\b)",
                "{{-s$1|$2}}");
 
            // Ier siècle -> {{s-|I|er}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?:\bI|\b1|\{\{I\}\}) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me|[éeè°]|er)(?:\}\}|</sup>)? +[Ss](?:i[éeè]cle\b|\.)",
                "{{s-|I|er}}");
 
            // 16{{e}} et 17{{e}} siècles -> {{sp-|XVI|e|et|XVII|e|s}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)\b([12]?\d) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me|[éeè°])(?:\}\}|</sup>)?\s*(et(?: le| du)?|au|-|/|ou)\s*([12]?\d) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me|[éeè°])(?:\}\}|</sup>)? +[Ss](?:i[éeè]cles?\b|\.)",
                "{{sp-|{{subst:Nombre en romain|$1|subst=subst:}}|e|$2|{{subst:Nombre en romain|$3|subst=subst:}}|e|s}}",
                RegexOptions.IgnoreCase);
 
            // 3e siècle -> {{s-|III|e}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)\b([12]?\d) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me|[éeè°])(?:\}\}|</sup>)? +[Ss](?:i[éeè]cle\b|\.)",
                "{{s-|{{subst:Nombre en romain|$1|subst=subst:}}|e}}");
 
            // VIe siècle -> {{s-|VI|e}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?:\b([XVI]+)|\{\{([XVI]+)\}\}) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me|[éeè°])(?:\}\}|</sup>)? +[Ss](?:i[éeè]cle\b|\.)",
                "{{s-|$1$2|e}}");
 
            // VIe -> {{s mini-|VI|e}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?:\b([XVI]+)|\{\{([XVI]+)\}\}) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me\b|[éeè]\b|°)(?:\}\}|</sup>)?(?!(?:\}\}|</sup>)?\s+(?:Reich|République|arrondissement|[cC]ongrès|[éÉ]dition))",
                "{{s mini-|$1$2|e}}");
 
            // Ier -> {{s mini-|I|er}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?<!(?:François|Henri|Napoléon|Louis|Gabriel|Arthur|Ferdinand|Nicolas|Georges?|Alexandre|Charles|Frédéric|Richard)\s+)\bI *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me\b|[éeè]\b|°|er\b)(?:\}\}|</sup>)?(?!(?:\}\}|</sup>)?\s+(?:Reich|arrondissement|[cC]ongrès))",
                "{{s mini-|I|er}}");
 
            // {{-s mini-|VI|e}} et {{-s-|VII|e}} -> {{-sp-|VI|e|et|VII|e|s}}
            ArticleText = Regex.Replace(ArticleText,
                @"\{\{(?:-?s mini(-)?\|([XVI]+)\|(er?)|([XVI]+)(er?))\}\}\s*(et|au|-|/)\s*\{\{-s(?: mini)?-?\|([XVI]+\|er?)\}\}(?:\s+s(?:i[éeè]cles?|\.) +[aA]v(?:ant|\.) +J(?:ésus|\.)?[- ]?C(?:hrist\b|\.|\b)|s)?",
                "{{-sp$1|$2$4|$3$5|$6|$7|s}}",
                RegexOptions.IgnoreCase);
 
            // {{-s mini|III|e}} et {{Ier siècle av. J.-C.}} -> {{-sp|III|e|et|I|er|s}}
            // {{VIe s}} et {{IIe siècle av. J.-C.}}s -> {{-sp|VI|e|et|II|e|s}}
            ArticleText = Regex.Replace(ArticleText,
                @"\{\{(?:-?s[ _]+mini\|([XVI]+)\|(er?)|([XVI]+)(er?) s(?:iècle)?)\}\}\s+(et|au|-|/)\s+\{\{([XVI]+)(er?)[ _]+siècle[ _]+[aA]v\.[ _]+J\.-C\.\}\}",
                "{{-sp|$1$3|$2$4|$5|$6|$7|s}}",
                RegexOptions.IgnoreCase);
 
            // {{s mini-|VI|e}} et {{s-|VII|e}} -> {{sp-|VI|e|et|VII|e|s}}
            ArticleText = Regex.Replace(ArticleText,
                @"\{\{(?:s mini(-)?\|([XVI]+)\|(er?)|([XVI]+)(er?))\}\}\s*(et|au|-|/)\s*\{\{s(?: mini)?-?\|([XVI]+\|er?)\}\}(?:\s+si[éeè]cles?|s)?",
                "{{sp$1|$2$4|$3$5|$6|$7|s}}",
                RegexOptions.IgnoreCase);
 
            // {{s mini|XII|e}} et {{XIIIe siècle}} -> {{sp|XII|e|et|XIII|e|s}}
            // {{XVIe s}} et {{XVIIe siècle}}s -> {{sp|XVI|e|et|XVII|e|s}}
            ArticleText = Regex.Replace(ArticleText,
                @"\{\{(?:s[ _]+mini\|([XVI]+)\|(er?)|([XVI]+)(er?) s(?:iècle)?)\}\}\s+(et|au|-|/)\s+\{\{([XVI]+)(er?)[ _]+siècle\}\}s?",
                "{{sp|$1$3|$2$4|$5|$6|$7|s}}",
                RegexOptions.IgnoreCase);
 
            if (ArticleText != ArticleTextOld)
            {
                Summary += "wikif siècle, ";
                ArticleTextOld = ArticleText;
            }
 
            // [[Machin IIe truc]] -> [[Machin IIe truc|Machin II{{e}} truc]]
            ArticleText = Regex.Replace(ArticleText,
                @"(?<=\[\[(?!Catégorie:)):?([^\|\]]*\b(?:[XVI]+|\d+))(er?|re|nd)\b([^\|]*?)(?=\]\])",
                "$&|$1{{$2}}$3");
            // 1re -> 1{{re}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?<=\b1) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?[éeè]?re(s)?\b(?:\}\}|</sup>)?",
                "{{re$1}}");
            // 1er -> 1{{er}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?<=\b1) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me\b|[éeè]\b|°(?! *[CF]\b| *\d+['′])|er)(?:\}\}|</sup>)?",
                "{{er}}");
            // 2nd -> 2{{nd}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?<=\b2) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:nd|n?d(es?))(?:\}\}|</sup>)?",
                "{{nd$1}}");
            // 3e -> 3{{e}}
            ArticleText = Regex.Replace(ArticleText,
                @"(?<!\[\[(?:.(?<!\]\]|\|))*|\[\[Catégorie\s*:(?:.(?<!\]\]))*|\{\{(?:.(?<!\}\}|\|))*)(?<=\b\d+) *(?:\{\{(?:[Ee]xp\|)?|<sup(?: +class=""exposant"")?>)?(?:i?[éeè]me\b|[éeè]\b|°(?! *[CF]\b| *\d+['′]))(?:\}\}|</sup>)?",
                "{{e}}");
            if (ArticleText != ArticleTextOld)
            {
                Summary += "wikif nième, ";
                ArticleTextOld = ArticleText;
            }
 
 
            Summary = Summary.Trim(", ".ToCharArray());
            return ArticleText;
        }
 
        // fonctions bandeaux
        private string NettoyageBandeau(Match m)
        {
            string bandeau;
            string bandeaux = "";
            foreach (string b in m.Groups[2].Value.Split(new Char[] { '|' }))
            {
                bandeau = b.Trim();
                bandeau = Regex.Replace(bandeau, "_", " ");
                bandeau = Regex.Replace(bandeau, " +", " ");
                bandeau = Regex.Replace(bandeau, @"^(?:(?::? *fr *: *)?(?:Modèle|Template) *: *)?", "");
                bandeau = Tools.TurnFirstToUpper(bandeau);
                if (Regex.IsMatch(bandeau, @"^(?:Ébauche(?:s?| .+)|Portail .+)$", RegexOptions.IgnoreCase))
                {
                    if (Array.IndexOf(BandeauxUniques, bandeau) == -1)
                    {
                        BandeauxUniques[nbBU] = bandeau;
                        nbBU++;
                        bandeaux += bandeau + "|";
                    }
                }
                else
                {
                    bandeaux += bandeau + "|";
                }
            }
            if (bandeaux == String.Empty)
            {
                return String.Empty;
            } else {
                return m.Groups[1].Value + bandeaux.Trim(new Char[] { '|' }) + m.Groups[3].Value;
            }
        }
        private string MultiBandeau(Match m)
        {
            string bandeaux = "";
            foreach (Capture c in m.Groups[1].Captures)
            {
                bandeaux += "|" + c.Value;
            }
            return "{{Multi bandeau" + bandeaux + "}}" + m.Groups[2].Captures[m.Groups[2].Captures.Count-1];
        }
 
        // fonctions catégories et defaultsort
        private string UpFirstChar(Match m)
        {
            return m.Value.ToUpper();
        }
        private string CleanKey(string key)
        {
            key = Regex.Replace(key.Replace("_"," "), @" +", " ");
 
            string[] tr1 = new string[48] { "0⅛¼⅜⅝⅞½¾٠۰", "1١۱", "2٢۲", "3٣۳", "4٤۴", "5٥۵", "6٦۶", "7٧۷", "8٨۸", "9٩۹", "aáàâãäåăąā", "AÁÀÂÃÄÅĂĄĀ", "cçćĉčċ", "CÇĆĈČĊ", "dðďđ", "DÐĎĐ", "eéèêëěėęē", "EÉÈÊËĚĖĘĒ", "gğĝġģ", "GĞĜĠĢ", "hĥħ", "HĤĦ", "iíìîïıĩįī", "IÍÌÎÏİĨĮĪ", "jĵ", "JĴ", "kĸķ", "KĶ", "lĺľłļŀ", "LĹĽŁĻĿ", "nñʼnńňņ", "NÑŃŇŅ", "oóòôõöøőō", "OÓÒÔÕÖØŐŌ", "rŕřŗ", "RŔŘŖ", "sśŝšş", "SŚŜŠŞ", "tťŧţ", "TŤŦŢ", "uúùûüŭůűũųū", "UÚÙÛÜŬŮŰŨŲŪ", "wŵ", "WŴ", "yýÿŷ", "YÝŶŸ", "zźžż", "ZŹŽŻ" };
            string[] tr2 = new string[8] { "aeæ", "AEÆ", "oeœ", "OEŒ", "ssß", "ijij", "ngŋ", "NGŊ" };
 
            foreach (string t in tr1)
            {
                key = Regex.Replace(key, @"[" + t.Substring(1) + @"]", t.Substring(0, 1));
            }
            foreach (string t in tr2)
            {
                key = Regex.Replace(key, @"[" + t.Substring(2) + @"]", t.Substring(0, 2));
            }
 
            return key;
        }
        private string Key(Match m)
        {
            return "DEFAULTSORT:" + CleanKey(m.Groups[1].Value);
        }
 
        private string formatnum2(Match m)
        {
            string number = m.Value;
            number = Regex.Replace(number, @"[\.  ]|&(?:nb|thin)sp;", "");
            number = Regex.Replace(number, @",", ".");
            return "{{formatnum:" + number + "}}";
        }
        private string formatnum1(Match m)
        {
            string number = m.Value;
            number = Regex.Replace(number, @"[,  ]|&(?:nb|thin)sp;", "");
            return "{{formatnum:" + number + "}}";
        }
 
// à copier/coller dans le module jusqu'ici
    }
}