In [38]:
import requests
import xml.etree.ElementTree as ET 
import copy
import io
import pandas as pd
In [ ]:
data = requests.get('<xml link>').text
In [31]:
#f = io.StringIO(data)
#root = ET.fromstring(data)
#root2 = ET.fromstring(data)
filename = "dc.xml"
tree = ET.parse(filename)
tree2 = ET.parse(filename)
root = tree.getroot()
root2 = tree2.getroot()
In [32]:
r2r = root2.find('./diagram/mxGraphModel/root')
for idx, c in enumerate(list(r2r)):
    if idx > 2:
        r2r.remove(c)
In [33]:
for item in r2r:
    print(item.attrib['id'])
0
1
1eOIn4WZ26UqJBf8JEfd-2
In [34]:
rep = {
    '0': 6,
    '1': 3,
    '2': 3,
    '3': 7,
    '4': 6,
    '5': 7,
    '6': 7,
    '7': 3,
    '8': 5,
    '9': 8,
    '10': 6,
    '11': 7,
    '12': 8,
    '13': 7,
    '14': 7,
}
In [35]:
for idx, item in enumerate(root.find('diagram/mxGraphModel/root')):
    if idx < 3: continue
    rval = item.attrib.get('value', -1)
    if rval[:2] == 'b2':
        rval = 'b2'
    print(rval)
    if rval in rep:
        print('rval', rval)
        genid = item.attrib['value']
        if 'text' in item.attrib['style']:
            genid = f'text_{genid}'
        else:
            genid = f'bar_{genid}'
        item.attrib['id'] = genid
        item.attrib['value'] = ''
        mx = item.find('./mxGeometry').attrib
        mx_x = int(mx['x'])
        mx_y = float(mx['y'])
        for i in range(rep[rval]):
            item2 = copy.deepcopy(item)
            item2.attrib['id'] += str(i)
            item2.find('./mxGeometry').attrib.update({
                'y': str(mx_y+(i)*33)
            })
            r2r.insert(idx, item2)
        #for i in range(rep[rval]):
        #    item2 = copy.deepcopy(item)
    else:
        item.attrib['id'] = rval
        item.attrib['value'] = ''
        r2r.insert(idx, item)
7
rval 7
10
rval 10
12
rval 12
7
rval 7
10
rval 10
12
rval 12
8
rval 8
8
rval 8
1
rval 1
1
rval 1
2
rval 2
2
rval 2
4
rval 4
4
rval 4
0
rval 0
0
rval 0
3
rval 3
3
rval 3
5
rval 5
5
rval 5
9
rval 9
9
rval 9
11
rval 11
11
rval 11
13
rval 13
13
rval 13
b1
b2
b3
b4
b5
b6
b7
bata
batb
batc
mainbatdc
mainbatv
mainbatsc
6
rval 6
6
rval 6
14
rval 14
14
rval 14
bata_label
batb_label
batc_label
mainbatd_label
mainbatsc_label
In [36]:
ids = [item.attrib['id'] for item in r2r]
df = pd.DataFrame(ids)
df[df.duplicated()]
Out[36]:
0
In [37]:
tree = ET.ElementTree(root2)
tree.write(open('test.xml', 'w'), encoding='unicode')
In [ ]: