# -*- coding: iso-8859-1 -*-
import urllib
import sgmllib
class ParseOldURL(sgmllib.SGMLParser):
"A simple parser class."
def parse(self, s):
"Parse the given string 's'."
self.feed(s)
self.close()
def __init__(self, url_l, print_s=0, verbose=0):
"Initialise an object, passing 'verbose' to the superclass."
sgmllib.SGMLParser.__init__(self, verbose)
self.newURL = ''
self.redirectURL = ''
self.oldURL = 0
self.trouve = 0
opener = urllib.FancyURLopener({})
urlopener = opener.open(url_l)
s = urlopener.read()
self.redirectURL = self.redirectURL + urlopener.geturl()
if print_s == 1:
print s
self.parse(s)
def start_a(self, attributes):
if self.trouve == 0:
if self.oldURL > 0:
for name, value in attributes:
if name == "href":
self.newURL = self.newURL + value
self.trouve = 1
def start_div(self, attributes):
if self.trouve == 0:
if self.oldURL == 0:
for name, value in attributes:
if name == "id":
if value.find("oldURL") != -1:
self.oldURL = 1
else :
self.oldURL = self.oldURL + 1
def end_div(self):
if self.trouve == 0:
if self.oldURL > 0:
self.oldURL = self.oldURL - 1
def get_newURL(self):
if self.trouve == 0:
return self.redirectURL
else :
return self.newURL
def get_param(self, param):
if self.trouve == 0:
return ''
else :
_idTexte = param + '='
start_idTexte = self.newURL.find(_idTexte)
if start_idTexte > -1:
end_idTexte = self.newURL.find('&', start_idTexte+len(_idTexte))
if end_idTexte == -1:
end_idTexte = len(self.newURL)
return self.newURL[start_idTexte+len(_idTexte):end_idTexte]
def get_idTexte(self):
return self.get_param('idTexte')
def get_dateTexte(self):
return self.get_param('dateTexte')
class ModeleLegifrance:
def __init__(self, ml):
self.base_needTranslate = [
"CASS",
"INCA",
"JADE",
"CONSTIT",
"LEGI",
"LEX",
"LEX_SIMPLE_AV90",
"avant90",
"consolidé",
"texteconsolide",
"consolide",
"JORF"
]
self.Code = {
"CC" : "CCIVILL0",
"CCIVILL0" : "CCIVILL0",
"CCOM" : "CCOMMERL",
"COM" : "CCOMMERL",
"CCOM(R)" : "CCOMMERM",
"COM(R)" : "CCOMMERM",
"CGCT" : "CGCTERRL",
"CGCT(R)" : "CGCTERRM",
"CEDU" : "CEDUCATL",
"CEDU(R)" : "CEDUCATM",
"CELE" : "CELECTOL",
"CELE(R)" : "CELECTOM",
"CESEDA(L)" : "CENTGERL",
"CESEDA(R)" : "CENTGERM",
"CE" : "CENVIROL",
"CE(R)" : "CENVIROM",
"CJA" : "CJUSADML",
"CJA(R)" : "CJUSADMR",
"CJF(L)" : "CJURFINL",
"CJF(R)" : "CJURFINR",
"COJ(L)" : "CORGJUDL",
"COJ(R)" : "CORGJUDR",
"COJ" : "CORGJUNL",
"CPAT" : "CPATRIML",
"CP" : "CPENALLL",
"CP(R)" : "CPENALLR",
"CPC" : "CPROCIA0",
"NCPC" : "CPROCIV0",
"CPROCIV0" : "CPROCIV0",
"CPP" : "CPROCPEL",
"CPP(R)" : "CPROCPER",
"CPP(D)" : "CPROCPED",
"CPP(A)" : "CPROCPEA",
"CGPPP" : "CGPROPPL",
"CPI" : "CPROINTL",
"CESEDA" : "CENTGERL",
"CRO" : "CROUTENL",
"CRO(R)" : "CROUTENM",
"CR" : "CRURALNL",
"CR(R)" : "CRURALNM",
"CSP" : "CSANPUNL",
"CSP(NR)" : "CSANPUNR",
"CSP(L)" : "CSANPUBL",
"CSP(R)" : "CSANPUBR",
"CSS(L)" : "CSECSOCL",
"CSS(D)" : "CSECSOCD",
"CSS(R)" : "CSECSOCR",
"CT(NL)" : "CTRAVANL",
"CT" : "CTRAVAIL",
"CT(R)" : "CTRAVAIR",
"CT(D)" : "CTRAVAID",
"CONSO" : "CCONSOML",
"CONSO(R)" : "CCONSOMR",
"CONSO(D)" : "CCONSOMD",
"URBA(L)" : "CURBANIL",
"URBA(R)" : "CURBANIR",
"CGI" : "CGIMPO00",
"CGLIVP" : "CGLIVPFL",
"CGLIVPFM" : "CGLIVPFM",
"CGLIVPFA" : "CGLIVPFA",
"ASS" : "CASSURAL",
"ASS(R)" : "CASSURAM",
"ASS(A)" : "CASSURAA",
"CDEF" : "CDAFENSL",
"CDEF(R)" : "CDAFENSM"
}
self.Modele = ""
self.Base = ""
self.Numero = ""
self.Texte = ""
self.oldURL = ""
p = ml.find("{{")
if p != -1:
ml = ml[p:]
p = ml.find("}}")
if p != -1:
ml = ml[:p]
self.listParametres = ml.split("|", 3)
l = len(self.listParametres)
if l > 0:
p = self.listParametres[0].find("Légifrance")
if p != -1:
self.Modele = self.listParametres[0][p:]
if l > 1:
p = self.listParametres[1].find("=")
if p == -1 :
self.Base = self.listParametres[1]
else :
self.Base = self.listParametres[1][p+1:]
if l > 2:
p = self.listParametres[2].find("=")
if p == -1 :
self.Numero = self.listParametres[2]
else :
self.Numero = self.listParametres[2][p+1:]
if l > 3:
p = self.listParametres[3].find("=")
if p == -1 :
self.Texte = self.listParametres[3]
else :
self.Texte = self.listParametres[3][p+1:]
def Debug(self):
print "Modele = " + self.Modele
print "Base = " + self.Base
print "Numero = " + self.Numero
print "Texte = " + self.Texte
def needTranslate(self):
#if self.Base in self.base_needTranslate:
return True
#else:
# return False
def doOldURL(self):
if self.needTranslate():
self.oldURL = "http://www.legifrance.gouv.fr/"
i = 0
while i < len(self.base_needTranslate):
if self.base_needTranslate[i] == self.Base:
break
else:
i = i + 1
if i < 8:
self.oldURL = self.oldURL + "WAspad/UnDocument?base="
if i == 7:
self.oldURL = self.oldURL + self.base_needTranslate[6]
else:
self.oldURL = self.oldURL + self.base_needTranslate[i]
self.oldURL = self.oldURL + "&nod="
elif i < 11:
self.oldURL = self.oldURL + "texteconsolide/"
elif i == 11:
self.oldURL = self.oldURL + "WAspad/UnTexteDeJorf?numjo="
else:
self.oldURL = self.oldURL + "WAspad/UnArticleDeCode?code=" + self.Code[self.Base] + ".rcv&art="
self.oldURL = self.oldURL + self.Numero
return self.oldURL
a1 = "{{Légifrance|Base=CCIVILL0|numéro=1116|Texte=Le Texte fondateur du dol en droit civil}}"
a2 = "{{Légifrance|base=INCA|numéro=IXRXCX2005X12X06X00813X012|texte=Arrêt de la Cour de Cassation}}"
a3 = "{{Légifrance|base=consolidé|numéro=PPEDY.htm}}"
a4 = "{{Légifrance|base=avant90|numéro=1LX978742|texte=Loi}}"
m1 = ModeleLegifrance(a1)
m2 = ModeleLegifrance(a2)
m3 = ModeleLegifrance(a3)
m4 = ModeleLegifrance(a4)
m1.Debug()
if m1.needTranslate():
s = m1.doOldURL()
P = ParseOldURL(s,1)
print "oldURL = " + s
print "newURL = " + P.get_newURL()
print
m2.Debug()
if m2.needTranslate():
s = m2.doOldURL()
P = ParseOldURL(s)
print "oldURL = " + s
print "newURL = " + P.get_newURL()
print
m3.Debug()
if m3.needTranslate():
s = m3.doOldURL()
P = ParseOldURL(s)
print "oldURL = " + s
print "newURL = " + P.get_newURL()
print
m4.Debug()
if m4.needTranslate():
s = m4.doOldURL()
P = ParseOldURL(s)
print "oldURL = " + s
print "newURL = " + P.get_newURL()