In [1]:
import pandas as pd
In [2]:
# https://stackoverflow.com/questions/30791839/is-there-an-easy-way-to-group-columns-in-a-pandas-dataframe
In [3]:
from io import StringIO
import numpy as np
a = pd.read_csv(StringIO('T,Ax,Ay,Az,Bx,By,Bz,Cx,Cy,Cz,Dx,Dy,Dz\n\
    0,1,2,1,3,2,1,4,2,1,5,2,1\n\
    1,8,2,3,3,2,9,9,1,3,4,9,1\n\
    2,4,5,7,7,7,1,8,3,6,9,2,3'))
a.set_index('T', inplace=True)
In [4]:
a
Out[4]:
Ax Ay Az Bx By Bz Cx Cy Cz Dx Dy Dz
T
0 1 2 1 3 2 1 4 2 1 5 2 1
1 8 2 3 3 2 9 9 1 3 4 9 1
2 4 5 7 7 7 1 8 3 6 9 2 3
In [6]:
a.columns = pd.MultiIndex.from_tuples([(c[0], c[1]) for c in a.columns])
a
Out[6]:
A B C D
x y z x y z x y z x y z
T
0 1 2 1 3 2 1 4 2 1 5 2 1
1 8 2 3 3 2 9 9 1 3 4 9 1
2 4 5 7 7 7 1 8 3 6 9 2 3
In [ ]:
 
In [56]:
row = []
acol = ['AAA', 'BBB']
bcol = ['A', 'B', 'C', 'D']
ccol = ['x', 'y', 'z']
for i in range(10):
    col = []
    col.append(str(i))
    for a in acol:
        for b in bcol:
            for c in ccol:
                col.append(str(random.randint(1,10)))
    row.append(col)
In [60]:
df = pd.DataFrame(row)
df.set_index(0, inplace=True)
df
Out[60]:
1 2 3 4 5 6 7 8 9 10 ... 15 16 17 18 19 20 21 22 23 24
0
0 1 9 8 8 10 1 8 8 7 6 ... 2 4 2 9 9 4 4 8 10 2
1 2 2 2 8 2 3 9 1 5 10 ... 4 6 8 3 2 5 7 7 5 2
2 10 5 2 2 5 5 10 10 4 7 ... 2 3 10 2 6 5 6 1 5 3
3 1 2 5 1 4 3 1 7 8 7 ... 3 8 8 4 7 9 1 10 1 5
4 1 1 4 7 8 7 7 3 10 2 ... 10 6 1 3 10 10 1 3 10 1
5 4 6 1 10 3 3 5 10 3 9 ... 10 8 4 9 3 9 1 4 3 1
6 8 10 2 4 8 5 1 8 3 5 ... 3 6 9 5 3 1 3 3 8 8
7 7 5 7 6 2 8 4 9 6 7 ... 6 5 2 6 7 5 5 2 7 7
8 8 7 3 2 4 6 3 5 1 2 ... 4 4 6 7 3 10 3 3 3 1
9 3 3 3 8 9 9 1 9 3 4 ... 6 10 1 9 8 2 7 3 7 9

10 rows × 24 columns

In [61]:
df.columns = pd.MultiIndex.from_tuples([[a,b,c] for a in acol for b in bcol for c in ccol])
In [62]:
df
Out[62]:
AAA ... BBB
A B C D ... A B C D
x y z x y z x y z x ... z x y z x y z x y z
0
0 1 9 8 8 10 1 8 8 7 6 ... 2 4 2 9 9 4 4 8 10 2
1 2 2 2 8 2 3 9 1 5 10 ... 4 6 8 3 2 5 7 7 5 2
2 10 5 2 2 5 5 10 10 4 7 ... 2 3 10 2 6 5 6 1 5 3
3 1 2 5 1 4 3 1 7 8 7 ... 3 8 8 4 7 9 1 10 1 5
4 1 1 4 7 8 7 7 3 10 2 ... 10 6 1 3 10 10 1 3 10 1
5 4 6 1 10 3 3 5 10 3 9 ... 10 8 4 9 3 9 1 4 3 1
6 8 10 2 4 8 5 1 8 3 5 ... 3 6 9 5 3 1 3 3 8 8
7 7 5 7 6 2 8 4 9 6 7 ... 6 5 2 6 7 5 5 2 7 7
8 8 7 3 2 4 6 3 5 1 2 ... 4 4 6 7 3 10 3 3 3 1
9 3 3 3 8 9 9 1 9 3 4 ... 6 10 1 9 8 2 7 3 7 9

10 rows × 24 columns

In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [42]:
# importing pandas as pd
import pandas as pd
  
# Creating the array
array =[[1, 2, 3], ['Sharon', 'Nick', 'Bailey'], 
          ['Doctor', 'Scientist', 'Physicist']]
  
# Print the array
print(array)
[[1, 2, 3], ['Sharon', 'Nick', 'Bailey'], ['Doctor', 'Scientist', 'Physicist']]
In [43]:
midx = pd.MultiIndex.from_arrays(array, 
   names =('Ranking', 'Names', 'Profession'))
  
# Print the MultiIndex
print(midx)
MultiIndex(levels=[[1, 2, 3], ['Bailey', 'Nick', 'Sharon'], ['Doctor', 'Physicist', 'Scientist']],
           codes=[[0, 1, 2], [2, 1, 0], [0, 2, 1]],
           names=['Ranking', 'Names', 'Profession'])
In [ ]:
 
In [ ]:
 
In [40]:
import random
import pandas as pd
row = []
cols0 = ['Source1', 'Source2']
cols1 = ['status', 'op_status', 'env']
for a in range(10):
    vals = {}
    for b in cols0:
        for c in cols1:
            vals[b] = [*vals.get(b, []), str(random.randint(10, 20))]
    row.append([str(a), *vals['Source1'], *vals['Source2']])
df = pd.DataFrame(row)
df.columns = ['IP', *[f"{a} {b}" for a in cols0 for b in cols1]]
df = df.set_index('IP')
df.head(3)
Out[40]:
Source1 status Source1 op_status Source1 env Source2 status Source2 op_status Source2 env
IP
0 14 16 12 12 12 15
1 18 17 15 10 11 16
2 14 10 17 16 12 20
In [41]:
df.columns = pd.MultiIndex.from_tuples([c.split(" ") for c in df.columns])
df
Out[41]:
Source1 Source2
status op_status env status op_status env
IP
0 14 16 12 12 12 15
1 18 17 15 10 11 16
2 14 10 17 16 12 20
3 11 12 19 12 13 16
4 11 12 19 14 17 19
5 12 12 14 19 18 13
6 19 13 18 19 17 13
7 13 20 16 13 19 16
8 17 19 19 15 11 14
9 11 12 10 20 16 12
In [38]:
df.columns
Out[38]:
MultiIndex(levels=[['Source1', 'Source2'], ['env', 'op_status', 'status']],
           codes=[[0, 0, 0, 1, 1, 1], [2, 1, 0, 2, 1, 0]])
In [ ]:
 
In [45]:
import random
import pandas as pd
row = []
for a in range(10):
    for b in ['status', 'op_status', 'env']:
        row.append([str(a), b, random.randint(1,10), random.randint(10, 20)])
In [46]:
df = pd.DataFrame(row)
df.columns = ['IP', 'Attr', 'Source1', 'Source2']
df.head(10)
Out[46]:
IP Attr Source1 Source2
0 0 status 6 19
1 0 op_status 3 15
2 0 env 5 17
3 1 status 1 10
4 1 op_status 8 14
5 1 env 7 19
6 2 status 4 18
7 2 op_status 9 19
8 2 env 4 15
9 3 status 3 10
In [47]:
df.groupby(['IP', 'Attr']).first().to_excel("a.xlsx")
In [ ]:
df.groupby(['IP', 'Attr']).first().to_html("a.xlsx")