Utilisateur:ILJR/bac a sable/getNewURL

Un article de Wikipédia, l'encyclopédie libre.

# -*- coding: iso-8859-1 -*-
import urllib
import sgmllib
 
class ParseOldURL(sgmllib.SGMLParser):
    "A simple parser class."
 
    def parse(self, s):
        "Parse the given string 's'."
        self.feed(s)
        self.close()
 
    def __init__(self, url_l, print_s=0, verbose=0):
        "Initialise an object, passing 'verbose' to the superclass."
        sgmllib.SGMLParser.__init__(self, verbose)
        self.newURL = ''
        self.redirectURL = ''
        self.oldURL = 0
        self.trouve = 0
        opener = urllib.FancyURLopener({})
        urlopener = opener.open(url_l)
        s = urlopener.read()
        self.redirectURL = self.redirectURL + urlopener.geturl()
        if print_s == 1:
           print s
        self.parse(s)
 
    def start_a(self, attributes):
        if self.trouve == 0:
           if self.oldURL > 0:
              for name, value in attributes:
                 if name == "href":
                    self.newURL = self.newURL + value
                    self.trouve = 1
 
    def start_div(self, attributes):
        if self.trouve == 0:
           if self.oldURL == 0:
              for name, value in attributes:
                 if name == "id":
                    if value.find("oldURL") != -1:
                        self.oldURL = 1
           else :
              self.oldURL = self.oldURL + 1
 
    def end_div(self):
        if self.trouve == 0:
           if self.oldURL > 0:
              self.oldURL = self.oldURL - 1
 
    def get_newURL(self):
        if self.trouve == 0:
           return self.redirectURL
        else :
           return self.newURL
 
    def get_param(self, param):
        if self.trouve == 0:
           return ''
        else :
           _idTexte = param + '='
           start_idTexte = self.newURL.find(_idTexte)
           if start_idTexte > -1:
              end_idTexte = self.newURL.find('&', start_idTexte+len(_idTexte))
              if end_idTexte == -1:
                 end_idTexte = len(self.newURL)
              return self.newURL[start_idTexte+len(_idTexte):end_idTexte]
 
    def get_idTexte(self):
        return self.get_param('idTexte')
 
    def get_dateTexte(self):
        return self.get_param('dateTexte')
 
 
class ModeleLegifrance:
 
    def __init__(self, ml):
        self.base_needTranslate = [
           "CASS",
           "INCA",
           "JADE",
           "CONSTIT",
           "LEGI",
           "LEX",
           "LEX_SIMPLE_AV90",
           "avant90",
           "consolidé",
           "texteconsolide",
           "consolide",
           "JORF"
        ]
 
        self.Code = {
           "CC" : "CCIVILL0",
           "CCIVILL0" : "CCIVILL0",
           "CCOM" : "CCOMMERL",
           "COM" : "CCOMMERL",
           "CCOM(R)" : "CCOMMERM",
           "COM(R)" : "CCOMMERM",
           "CGCT" : "CGCTERRL",
           "CGCT(R)" : "CGCTERRM",
           "CEDU" : "CEDUCATL",
           "CEDU(R)" : "CEDUCATM",
           "CELE" : "CELECTOL",
           "CELE(R)" : "CELECTOM",
           "CESEDA(L)" : "CENTGERL",
           "CESEDA(R)" : "CENTGERM",
           "CE" : "CENVIROL",
           "CE(R)" : "CENVIROM",
           "CJA" : "CJUSADML",
           "CJA(R)" : "CJUSADMR",
           "CJF(L)" : "CJURFINL",
           "CJF(R)" : "CJURFINR",
           "COJ(L)" : "CORGJUDL",
           "COJ(R)" : "CORGJUDR",
           "COJ" : "CORGJUNL",
           "CPAT" : "CPATRIML",
           "CP" : "CPENALLL",
           "CP(R)" : "CPENALLR",
           "CPC" : "CPROCIA0",
           "NCPC" : "CPROCIV0",
           "CPROCIV0" : "CPROCIV0",
           "CPP" : "CPROCPEL",
           "CPP(R)" : "CPROCPER",
           "CPP(D)" : "CPROCPED",
           "CPP(A)" : "CPROCPEA",
           "CGPPP" : "CGPROPPL",
           "CPI" : "CPROINTL",
           "CESEDA" : "CENTGERL",
           "CRO" : "CROUTENL",
           "CRO(R)" : "CROUTENM",
           "CR" : "CRURALNL",
           "CR(R)" : "CRURALNM",
           "CSP" : "CSANPUNL",
           "CSP(NR)" : "CSANPUNR",
           "CSP(L)" : "CSANPUBL",
           "CSP(R)" : "CSANPUBR",
           "CSS(L)" : "CSECSOCL",
           "CSS(D)" : "CSECSOCD",
           "CSS(R)" : "CSECSOCR",
           "CT(NL)" : "CTRAVANL",
           "CT" : "CTRAVAIL",
           "CT(R)" : "CTRAVAIR",
           "CT(D)" : "CTRAVAID",
           "CONSO" : "CCONSOML",
           "CONSO(R)" : "CCONSOMR",
           "CONSO(D)" : "CCONSOMD",
           "URBA(L)" : "CURBANIL",
           "URBA(R)" : "CURBANIR",
           "CGI" : "CGIMPO00",
           "CGLIVP" : "CGLIVPFL",
           "CGLIVPFM" : "CGLIVPFM",
           "CGLIVPFA" : "CGLIVPFA",
           "ASS" : "CASSURAL",
           "ASS(R)" : "CASSURAM",
           "ASS(A)" : "CASSURAA",
           "CDEF" : "CDAFENSL",
           "CDEF(R)" : "CDAFENSM"
        }
 
        self.Modele = ""
        self.Base = ""
        self.Numero = ""
        self.Texte = ""
        self.oldURL = ""
 
        p = ml.find("{{")
        if p != -1:
           ml = ml[p:]
 
        p = ml.find("}}")
        if p != -1:
           ml = ml[:p]
 
        self.listParametres = ml.split("|", 3)
 
        l = len(self.listParametres)
 
        if l > 0:
           p = self.listParametres[0].find("Légifrance")
           if p != -1:
              self.Modele = self.listParametres[0][p:]
 
              if l > 1:
                 p = self.listParametres[1].find("=")
                 if p == -1 :
                    self.Base = self.listParametres[1]
                 else :
                    self.Base = self.listParametres[1][p+1:]
 
              if l > 2:
                 p = self.listParametres[2].find("=")
                 if p == -1 :
                    self.Numero = self.listParametres[2]
                 else :
                    self.Numero = self.listParametres[2][p+1:]
 
              if l > 3:
                 p = self.listParametres[3].find("=")
                 if p == -1 :
                    self.Texte = self.listParametres[3]
                 else :
                    self.Texte = self.listParametres[3][p+1:]
 
    def Debug(self):
        print "Modele = " + self.Modele
        print "Base   = " + self.Base
        print "Numero = " + self.Numero
        print "Texte  = " + self.Texte
 
    def needTranslate(self):
        #if self.Base in self.base_needTranslate:
           return True
        #else:
        #   return False
 
    def doOldURL(self):
        if self.needTranslate():
           self.oldURL = "http://www.legifrance.gouv.fr/"
           i = 0
           while i < len(self.base_needTranslate):
              if self.base_needTranslate[i] == self.Base:
                 break
              else:
                 i = i + 1
 
           if i < 8:
              self.oldURL = self.oldURL + "WAspad/UnDocument?base="
              if i == 7:
                 self.oldURL = self.oldURL + self.base_needTranslate[6]
              else:
                 self.oldURL = self.oldURL + self.base_needTranslate[i]
 
              self.oldURL = self.oldURL + "&nod="
           elif i < 11:
              self.oldURL = self.oldURL + "texteconsolide/"
           elif i == 11:
              self.oldURL = self.oldURL + "WAspad/UnTexteDeJorf?numjo="
           else:
              self.oldURL = self.oldURL + "WAspad/UnArticleDeCode?code=" + self.Code[self.Base] + ".rcv&art="
 
           self.oldURL = self.oldURL + self.Numero
        return self.oldURL
 
a1 = "{{Légifrance|Base=CCIVILL0|numéro=1116|Texte=Le Texte fondateur du dol en droit civil}}"
a2 = "{{Légifrance|base=INCA|numéro=IXRXCX2005X12X06X00813X012|texte=Arrêt de la Cour de Cassation}}"
a3 = "{{Légifrance|base=consolidé|numéro=PPEDY.htm}}"
a4 = "{{Légifrance|base=avant90|numéro=1LX978742|texte=Loi}}"
 
m1 = ModeleLegifrance(a1)
m2 = ModeleLegifrance(a2)
m3 = ModeleLegifrance(a3)
m4 = ModeleLegifrance(a4)
 
m1.Debug()
if m1.needTranslate():
   s = m1.doOldURL()
   P = ParseOldURL(s,1)
   print "oldURL = " + s
   print "newURL = " + P.get_newURL()
print
m2.Debug()
if m2.needTranslate():
   s = m2.doOldURL()
   P = ParseOldURL(s)
   print "oldURL = " + s
   print "newURL = " + P.get_newURL()
print
m3.Debug()
if m3.needTranslate():
   s = m3.doOldURL()
   P = ParseOldURL(s)
   print "oldURL = " + s
   print "newURL = " + P.get_newURL()
print
m4.Debug()
if m4.needTranslate():
   s = m4.doOldURL()
   P = ParseOldURL(s)
   print "oldURL = " + s
   print "newURL = " + P.get_newURL()