Utilisateur:ILJR/bac a sable/traitements Légifrance

Un article de Wikipédia, l'encyclopédie libre.

# -*- coding: utf-8 -*-
import urllib
import sgmllib
 
class ParseOldURL(sgmllib.SGMLParser):
    "A simple parser class."
 
    def parse(self, s):
        "Parse the given string 's'."
        self.feed(s)
        self.close()
 
    def __init__(self, url_l, print_s=0, verbose=0):
        "Initialise an object, passing 'verbose' to the superclass."
        sgmllib.SGMLParser.__init__(self, verbose)
        self.newURL = ''
        self.redirectURL = ''
        self.oldURL = 0
        self.trouve = 0
        opener = urllib.FancyURLopener({})
        urlopener = opener.open(url_l)
        s = urlopener.read()
        self.redirectURL = self.redirectURL + urlopener.geturl()
        if print_s == 1:
           print s
        self.parse(s)
 
    def start_a(self, attributes):
        if self.trouve == 0:
           if self.oldURL > 0:
              for name, value in attributes:
                 if name == "href":
                    self.newURL = self.newURL + value
                    self.trouve = 1
 
    def start_div(self, attributes):
        if self.trouve == 0:
           if self.oldURL == 0:
              for name, value in attributes:
                 if name == "id":
                    if value.find("oldURL") != -1:
                        self.oldURL = 1
           else :
              self.oldURL = self.oldURL + 1
 
    def end_div(self):
        if self.trouve == 0:
           if self.oldURL > 0:
              self.oldURL = self.oldURL - 1
 
    def get_newURL(self):
        if self.trouve == 0:
           return self.redirectURL
        else :
           return self.newURL
 
    def get_param(self, param):
        if self.trouve == 0:
           return ''
        else :
           _idTexte = param + '='
           start_idTexte = self.newURL.find(_idTexte)
           if start_idTexte > -1:
              end_idTexte = self.newURL.find('&', start_idTexte+len(_idTexte))
              if end_idTexte == -1:
                 end_idTexte = len(self.newURL)
              return self.newURL[start_idTexte+len(_idTexte):end_idTexte]
 
    def get_idTexte(self):
        return self.get_param('idTexte')
 
    def get_dateTexte(self):
        return self.get_param('dateTexte')
 
 
class ModeleLegifrance:
 
    def __init__(self, ml):
        self.Code = {
           u"CASS" : "CASS",
           u"INCA" : "INCA",
           u"JADE" : "JADE",
           u"CONSTIT" : "CONSTIT",
           u"LEGI" : "LEGI",
           u"LEX" : "LEX",
           u"LEX_SIMPLE_AV90" : "LEX_SIMPLE_AV90",
           u"avant90" : "LEX_SIMPLE_AV90",
           u"consolidé" : "texteconsolide",
           u"texteconsolide" : "texteconsolide",
           u"consolide" : "texteconsolide",
           u"JORF" : "JORF",
           u"CC" : "CCIVILL0",
           u"CCIVILL0" : "CCIVILL0",
           u"CCOM" : "CCOMMERL",
           u"COM" : "CCOMMERL",
           u"CCOM(R)" : "CCOMMERM",
           u"COM(R)" : "CCOMMERM",
           u"CGCT" : "CGCTERRL",
           u"CGCT(R)" : "CGCTERRM",
           u"CEDU" : "CEDUCATL",
           u"CEDU(R)" : "CEDUCATM",
           u"CELE" : "CELECTOL",
           u"CELE(R)" : "CELECTOM",
           u"CESEDA(L)" : "CENTGERL",
           u"CESEDA(R)" : "CENTGERM",
           u"CE" : "CENVIROL",
           u"CE(R)" : "CENVIROM",
           u"CJA" : "CJUSADML",
           u"CJA(R)" : "CJUSADMR",
           u"CJF(L)" : "CJURFINL",
           u"CJF(R)" : "CJURFINR",
           u"COJ(L)" : "CORGJUDL",
           u"COJ(R)" : "CORGJUDR",
           u"COJ" : "CORGJUNL",
           u"CPAT" : "CPATRIML",
           u"CP" : "CPENALLL",
           u"CP(R)" : "CPENALLR",
           u"CPC" : "CPROCIA0",
           u"NCPC" : "CPROCIV0",
           u"CPROCIV0" : "CPROCIV0",
           u"CPP" : "CPROCPEL",
           u"CPP(R)" : "CPROCPER",
           u"CPP(D)" : "CPROCPED",
           u"CPP(A)" : "CPROCPEA",
           u"CGPPP" : "CGPROPPL",
           u"CPI" : "CPROINTL",
           u"CESEDA" : "CENTGERL",
           u"CRO" : "CROUTENL",
           u"CRO(R)" : "CROUTENM",
           u"CR" : "CRURALNL",
           u"CR(R)" : "CRURALNM",
           u"CSP" : "CSANPUNL",
           u"CSP(NR)" : "CSANPUNR",
           u"CSP(L)" : "CSANPUBL",
           u"CSP(R)" : "CSANPUBR",
           u"CSS(L)" : "CSECSOCL",
           u"CSS(D)" : "CSECSOCD",
           u"CSS(R)" : "CSECSOCR",
           u"CT(NL)" : "CTRAVANL",
           u"CT" : "CTRAVAIL",
           u"CT(R)" : "CTRAVAIR",
           u"CT(D)" : "CTRAVAID",
           u"CONSO" : "CCONSOML",
           u"CONSO(R)" : "CCONSOMR",
           u"CONSO(D)" : "CCONSOMD",
           u"URBA(L)" : "CURBANIL",
           u"URBA(R)" : "CURBANIR",
           u"CGI" : "CGIMPO00",
           u"CGLIVP" : "CGLIVPFL",
           u"CGLIVPFM" : "CGLIVPFM",
           u"CGLIVPFA" : "CGLIVPFA",
           u"ASS" : "CASSURAL",
           u"ASS(R)" : "CASSURAM",
           u"ASS(A)" : "CASSURAA",
           u"CDEF" : "CDAFENSL",
           u"CDEF(R)" : "CDAFENSM"
        }
 
        self.iCode = {
           u"CASS" : 0,
           u"INCA" : 1,
           u"JADE" : 2,
           u"CONSTIT" : 3,
           u"LEGI" : 4,
           u"LEX" : 5,
           u"LEX_SIMPLE_AV90" : 6,
           u"avant90" : 7,
           u"consolidé" : 8,
           u"texteconsolide" : 9,
           u"consolide" : 10,
           u"JORF" : 11,
           u"CC" : 12,
           u"CCIVILL0" : 13,
           u"CCOM" : 14,
           u"COM" : 15,
           u"CCOM(R)" : 16,
           u"COM(R)" : 17,
           u"CGCT" : 18,
           u"CGCT(R)" : 19,
           u"CEDU" : 20,
           u"CEDU(R)" : 21,
           u"CELE" : 22,
           u"CELE(R)" : 23,
           u"CESEDA(L)" : 24,
           u"CESEDA(R)" : 25,
           u"CE" : 26,
           u"CE(R)" : 27,
           u"CJA" : 28,
           u"CJA(R)" : 29,
           u"CJF(L)" : 30,
           u"CJF(R)" : 31,
           u"COJ(L)" : 32,
           u"COJ(R)" : 33,
           u"COJ" : 34,
           u"CPAT" : 35,
           u"CP" : 36,
           u"CP(R)" : 37,
           u"CPC" : 38,
           u"NCPC" : 39,
           u"CPROCIV0" : 40,
           u"CPP" : 41,
           u"CPP(R)" : 42,
           u"CPP(D)" : 43,
           u"CPP(A)" : 44,
           u"CGPPP" : 45,
           u"CPI" : 46,
           u"CESEDA" : 47,
           u"CRO" : 48,
           u"CRO(R)" : 49,
           u"CR" : 50,
           u"CR(R)" : 51,
           u"CSP" : 52,
           u"CSP(NR)" : 53,
           u"CSP(L)" : 54,
           u"CSP(R)" : 55,
           u"CSS(L)" : 56,
           u"CSS(D)" : 57,
           u"CSS(R)" : 58,
           u"CT(NL)" : 59,
           u"CT" : 60,
           u"CT(R)" : 61,
           u"CT(D)" : 62,
           u"CONSO" : 63,
           u"CONSO(R)" : 64,
           u"CONSO(D)" : 65,
           u"URBA(L)" : 66,
           u"URBA(R)" : 67,
           u"CGI" : 68,
           u"CGLIVP" : 69,
           u"CGLIVPFM" : 70,
           u"CGLIVPFA" : 71,
           u"ASS" : 72,
           u"ASS(R)" : 73,
           u"ASS(A)" : 74,
           u"CDEF" : 75,
           u"CDEF(R)" : 76
        }
 
        self.Modele = ""
        self.Base = ""
        self.Numero = ""
        self.Texte = ""
        self.oldURL = ""
 
        p = ml.find("{{")
        if p != -1:
           ml = ml[p:]
 
        p = ml.find("}}")
        if p != -1:
           ml = ml[:p]
 
        self.listParametres = ml.split("|", 3)
 
        l = len(self.listParametres)
 
        if l > 0:
           p = self.listParametres[0].find(u"Légifrance")
           if p == -1:
              p = self.listParametres[0].find(u"légifrance")
           if p != -1:
              self.Modele = u"Légifrance"
 
              if l > 1:
                 p = self.listParametres[1].find("=")
                 if p == -1 :
                    self.Base = self.listParametres[1]
                 else :
                    self.Base = self.listParametres[1][p+1:]
 
              if l > 2:
                 p = self.listParametres[2].find("=")
                 if p == -1 :
                    self.Numero = self.listParametres[2]
                 else :
                    self.Numero = self.listParametres[2][p+1:]
 
              if l > 3:
                 p = self.listParametres[3].find("=")
                 if p == -1 :
                    self.Texte = self.listParametres[3]
                 else :
                    self.Texte = self.listParametres[3][p+1:]
 
    def Debug(self):
        print "Modele = " + self.Modele
        print "Base   = " + self.get_newBase()
        print "Numero = " + self.Numero
        print "Texte  = " + self.Texte
 
    def get_newBase(self):
        if self.Base in self.Code:
           return self.Code[self.Base]
        else:
           return self.Base
 
    def get_texte(self):
        if self.Texte == "":
           if self.get_iCode() < 12:
              return ""
           else:
              return self.Numero
        else:
           return self.Texte
 
    def get_iCode(self):
        if self.Base in self.iCode:
           return self.iCode[self.Base]
        else:
           return -1
 
    def doOldURL(self):
        self.oldURL = "http://www.legifrance.gouv.fr/"
        i = self.get_iCode()
 
        if i >= 0:
           if i < 8:
              self.oldURL = self.oldURL + "WAspad/UnDocument?base=" + self.get_newBase() + "&nod="
           elif i < 11:
              self.oldURL = self.oldURL + "texteconsolide/"
           elif i == 11:
              self.oldURL = self.oldURL + "WAspad/UnTexteDeJorf?numjo="
           else:
              self.oldURL = self.oldURL + "WAspad/UnArticleDeCode?code=" + self.get_newBase() + ".rcv&art="
 
           self.oldURL = self.oldURL + self.Numero
 
        return self.oldURL
 
# -*- coding: utf-8 -*-
import wikipedia
import ModeleLegifrance
 
def save_texte(nom_fichier, texte):
    f = open(nom_fichier, "wt")
    f.write(texte.encode("utf-8"))
    f.close();
 
def traite_modele(modele, old, test=0):
    new = ""
    l = len(old)
    p = 0
    while p > -1:
       p = old.find("{{" + modele)
       if p == -1:
          new = new + old
       else:
          new = new + old[:p]
          old = old[p:]
          p = old.find("}}")
          if p == -1:
             new = new + old
          else:
             a = ModeleLegifrance.ModeleLegifrance(old)
             new = new + "{{" + a.Modele + "|base=" + a.Base + u"|numéro=" + a.Numero  + "|texte=" + a.get_texte() + "}}"
             old = old[p+2:]
          if test == 1:
             print a.get_iCode()
    return new
 
def main(test=0):
    listeArticles = []
    f = open("test_Jbot_ML.lst", "rt")
    listeArticles = f.readlines()
    f.close();
 
    site = wikipedia.getSite()
 
    for nomDePageURL in listeArticles:
       nomDePageURL = nomDePageURL[:len(nomDePageURL)-1]
       pageL = wikipedia.Page(site, nomDePageURL)
       if pageL.exists():
          if not pageL.isRedirectPage():
             if pageL.botMayEdit():
                print nomDePageURL
                old = pageL.get()
                if test == 1:
                   save_texte(nomDePageURL + ".old", old)
                new = traite_modele(u"légifrance", old)
                if test == 1:
                   save_texte(nomDePageURL + ".new1", new)
                if len(new) == 0:
                   new = traite_modele(u"Légifrance", old)
                else:
                   new = traite_modele(u"Légifrance", new)
 
                if new != old:
                   if test == 1:
                      save_texte(nomDePageURL + ".new", new)
                   else:
                      pageL.put(new, u"Ajout des noms des paramètres du [[Modèle:Légifrance]]")
 
if __name__ == "__main__":
    try:
       main()
    finally:
       wikipedia.stopme()