Need a script to convert from one xml formatting to another.
For instance,
Change this:
<subSection name="tag1" title="tag 1 title" >
</subSection>
To:
<subForm name="tag1form" title="tag 1 title" >
<section name="tag1" >
</section>
</subForm>
I tried doing this with bash but it didn't seem feasible since I wanted to change the name of xml attribute values. sed at first was the obvious choice.
Here was my attempt with sed but it's not done:
#$1 input file
while read line
do
echo $line | sed -e 's/subSection/<subForm/g' -e 's/<\/subSection/<\/subForm/g'
done < $1
A quick and dirty solution seemed more likely in python:
#usage python convert_xml.py source_file.xml > converted_file.xml
import sys
import re
formNameMatch = re.compile('[nN]ame= *["\'][a-zA-Z0-9_\-]+["\']')
def ProcessXML(file):
f = open(file,'r')
for line in f.readlines():
ProcessLine(line)
def ProcessLine(line):
if re.search(r'<subSection',line):
if re.search(r'subSections',line):
line = re.sub(r'subSections','subForms',line)
print line
return
line = re.sub(r'subSection','subForm',line)
nameMatch = formNameMatch.search(line)
if nameMatch != None:
name = nameMatch.group(0)
nameForm = re.sub(r'"$','Form"',name)
nameForm = re.sub(r'\'$','Form\'',nameForm)
line2 = formNameMatch.sub(nameForm,line)
print line2
print " <sections>"
print " <section ",name," >"
else:
print "ERROR!!!"
elif re.search(r'<\/subSection *>',line):
print " </section>"
print " </sections>"
line = re.sub(r'subSection','subForm',line)
print line
elif re.search(r'<\/subSections>',line):
print re.sub(r'subSections','subForms',line)
elif re.search(r'multiSubSection',line):
line = re.sub(r'subSection','subForm',line)
line = re.sub(r'SubSection','SubForm',line)
nameMatch = formNameMatch.search(line)
if nameMatch != None:
name = nameMatch.group(0)
nameForm = re.sub(r'"$','Form"',name)
nameForm = re.sub(r'\'$','Form\'',nameForm)
line2 = formNameMatch.sub(nameForm,line)
print line2
else:
print line
def main(*args):
if len(args) != 2:
print "must supply file to process"
return
ProcessXML(args[1])
if __name__ == "__main__":
main(*sys.argv)
No comments:
Post a Comment