Python¥Ð¥¤¥ª?¡¡
7890¡¡¡¡¡¡2022-08-29 (·î) 15:35:43

¥ê¥¹¥È¤Î½ÅÊ£ºï½ü

Python¤Ç¥ê¥¹¥È¡ÊÇÛÎó¡Ë¤«¤é½ÅÊ£¤·¤¿Í×ÁǤòºï½ü¡¦Ãê½Ð | note.nkmk.me

newl = sorted(set(l), key=l.index)

pandas¥Æ¥£¥Ã¥×¥¹

¹ÔÁªÂò¤ÈÎóÁªÂò

¹Ô°ÌÃÖÁªÂò

¹Ô¤ÎÃÍÁªÂò¡¢Îó¤ÎÃÍÁªÂò

¥»¥ëÁªÂò

¹Ô±é»»¤ÈÎó±é»»

¹Ô´Ö¤Ç¤Î­¤·»»

import pandas as pd
df = pd.DataFrame([['A', 1, 3], ['B', 2, 4], ['C', 3, 5], ['D', 4, 6]], \
                  columns=['a', 'b', 'c'])
print(df)
#   a  b  c
#0  A  1  3
#1  B  2  4
#2  C  3  5
#3  D  4  6

print(df[df.index.isin([2, 3])].sum())  # ¥ê¥¹¥È¤Ë¤¢¤ë¹Ô¤ò­¤¹
#a    CD
#b     7
#c    11
#dtype: object

print(df.loc[0:1].sum())  # Ϣ³¤·¤¿¹Ô¤ò­¤¹
#a    AB
#b     3
#c     7
#dtype: object

print(df.loc[[0,2]].sum()) # (¥ê¥¹¥È¤Ë¤¢¤ë¡Ë¤È¤Ó¤È¤Ó¤Î¹Ô¤Ç¤âOK
#a    AC
#b     4
#c     8
#dtype: object

¤ª¤Þ¤±¡¢Ê¿¶Ñ

print(df[df.index.isin([2, 3])].mean())
#b    3.5
#c    5.5
#dtype: float64

print(df.loc[0:1].mean())
#b    1.5
#c    3.5
#dtype: float64

pandas DataFrame¤Ç¿ôÃͤηå¿ô¤ò´Ý¤á¤ë¤Ë¤Ï

2Îó´Ö¤Ç¤Î±é»»

Pandas¤ÇÊ£¿ô¤ÎÎó¤òÃͤò¤â¤È¤Ë¡¢¿·¤·¤¤Îó¤òǤ°Õ¤Î´Ø¿ô¤ÇÄêµÁ¤¹¤ëÊýË¡ | Shikoan's ML Blog

import pandas as pd
comike = pd.DataFrame({
    "block" : ["AX", "AY", "¤¢Z", "¤¢X", "¥¤Q", "¥¤R"],
    "number" : [1, 1, 10, 11, 12, 13],
    "side" : ["aX", "bX", "aY", "bZ", "aQ", "bR"]
})
print(comike)
#comike["space"] = comike.apply(lambda x: f"{x['block']}-{x['number']:02d}{x['side']}", axis=1)
comike = comike[comike.apply(lambda x: x['block'][1]==x['side'][1], axis=1) ]
print(comike)

ÃÍ¡¦¹Ô̾/Îó̾¤Ç¥½¡¼¥È¤¹¤ë

pandas.DataFrame, Series¤ò¥½¡¼¥È¤¹¤ësort_values, sort_index | note.nkmk.me

Ãͤǥ½¡¼¥È¤¹¤ë

df.sort_values()
df.sort_values(ascending=False)

¹Ô̾¤Ç¥½¡¼¥È¤¹¤ë

df.sort_index()

Îó̾¤Ç¥½¡¼¥È¤¹¤ë

df.sort_index(axis=1)

if¤Î¤¢¤ëÆâÊñ

even_list = [i for i in range(10) if i % 2 == 0]
even_list = [i if i % 2 == 0 else "odd" for i in range(10)]

»°¹à±é»»»Ò¡Ê¾ò·ï¼°¡Ë

Python¤Î»°¹à±é»»»Ò¡Ê¾ò·ï±é»»»Ò¡Ë¤Çifʸ¤ò°ì¹Ô¤Ç½ñ¤¯ | note.nkmk.me
6. ¼° (expression) — Python 3.8.0 ¥É¥­¥å¥á¥ó¥È¡¡6.12. ¾ò·ï¼° (Conditional Expressions)

result = a * 2 if a % 2 == 0 else a * 3

elif¤ò´Þ¤á¤¿¤¤»þ¡Ê¿ä¾©¤»¤º¡Ë

result = 'negative' if a < 0 else 'positive' if a > 0 else 'zero'

Í×Áǥǡ¼¥¿¤ÎÃÖ¤­´¹¤¨ replace

pandas.DataFrame, Series¤ÎÍ×ÁǤÎÃͤòÃÖ´¹¤¹¤ëreplace | note.nkmk.me

df = df.replace(oldvalue, newvalue)

Àµµ¬É½¸½¤ò»È¤Ã¤ÆÃÖ¤­´¹¤¨¤ò»ØÄꤹ¤ë¤³¤È¤¬¤Ç¤­¤ë¡£

print(df.replace('(.*)li(.*)', r'\1LI\2', regex=True))

¤Î¤è¤¦¤Ë¡¢regex=True¤Ç»ØÄꤹ¤ë¡£¡Ê¥Ç¥Õ¥©¥ë¥È¤ÏFalse¡Ë
Àµµ¬É½¸½¤Î»ØÄê¤Î»ÅÊý¤Ï¡¢¤¿¤È¤¨¤ÐPython¤ÎÀµµ¬É½¸½¥â¥¸¥å¡¼¥ëre¤Î»È¤¤Êý¡Êmatch¡¢search¡¢sub¤Ê¤É¡Ë | note.nkmk.me¤ò»²¾È¡£

ÆâÊñ¤Î£²½Å¥ë¡¼¥×

Python ¤Î¥Í¥¹¥È¤·¤¿ÆâÊñɽµ­ - Qiita

[(x, y) for x in [1, 2] for y in ['a', 'b', 'c']]

y¤¬Æâ¦¥ë¡¼¥×¡¢x¤¬³°Â¦¥ë¡¼¥×¤Ë¤Ê¤ë¤³¤È¤ËÃí°Õ¡£

¤â¤¦£±¤Ä¤Î¡Ê¾ò·ïÉÕ¤­¤Î¡ËÍ×Áǥǡ¼¥¿¤ÎÃÖ¤­´¹¤¨¡¡where

¾ò·ïÉÕ¤­¤ÎÍ×Áǽñ´¹¤¨¤Ï¡¢where¤Ç½ñ¤¯¤³¤È¤¬¤Ç¤­¤ë¡£

pandas¤Ç¾ò·ï¤Ë±þ¤¸¤ÆÃͤòÂåÆþ¡Êwhere, mask¡Ë | note.nkmk.me

where¤ò»È¤Ã¤Æ¡¢Â裳°ú¿ô¤Ë¿·¤·¤¤Ãͤò»ØÄꤹ¤ë¤³¤È¤Ë¤è¤Ã¤Æ¡¢½ñ¤­´¹¤¨¤ë¤³¤È¤¬¤Ç¤­¤ë¡£

£±Îó'A'¤ò½ñ¤­´¹¤¨¤ëÎã

df['D'] = df['A'].where(df['C'] == 'a', 100)     # Äê¿ô¤ò½ñ¹þ¤à
df['D'] = df['A'].where(df['C'] == 'a', df['B']) # df['B']¤«¤éÃͤò»ý¤Ã¤Æ¤¯¤ë

⤷¡¢¡Ö½ñ´¹¤¨¡×Áàºî¤È¤·¤Æ¸«¤ë¤È¾ò·ï»ØÄê¤ÏµÕ¡ÊTrue¤À¤È½ñ¤­´¹¤¨¤Ê¤¤¡Ë¤Ë¤Ê¤Ã¤Æ¤¤¤ë¤Î¤ÇÃí°Õ¡£¤½¤ÎÍýͳ¤Ï¡¢where¤¬¡ÊÂ裳°ú¿ô¤ò»ØÄꤷ¤Ê¤¤¤È¡Ë¾ò·ï¤¬True¤ÎÍ×ÁǤÀ¤±¤ò¸µ¤ÎÃͤòÈ´¤­½Ð¤·¡¢False¤ÎÍ×ÁǤÏNaN¤òÊÖ¤¹¤è¤¦¤Ê¸µ¡¹¤Îµ¡Ç½¤À¤«¤é¡£¤Ä¤Þ¤êTrue¤Ï¤½¤Î¤Þ¤Þ¤Ç¡¢False¤À¤È²¿¤«¤¹¤ë¤¬¡¢¤½¤Î¡Ö²¿¤«¡×¤¬Â裳°ú¿ô¤¬¤¢¤ì¤Ð¤½¤ÎÃÍ¡¢Â裳°ú¿ô¤¬»ØÄꤵ¤ì¤Æ¤¤¤Ê¤±¤ì¤ÐNaN¤È¤Ê¤ë¡£

¥Ç¡¼¥¿¥Õ¥ì¡¼¥àÁ´ÂΤǽñ¤­´¹¤¨¤ëÎã

df2 = df.where(df < 0, 100)
df2 = df.where(df < 0, df * 2)

Îó̾¤ÎÉÕ¤±Âؤ¨¡¡rename

pandas.DataFrame¤Î¹Ô̾¡¦Îó̾¤ÎÊѹ¹ | note.nkmk.me

df = df.rename(columns={µì̾: ¿·Ì¾, µì̾2: ¿·Ì¾2, ...})

¤Ê¤ª¡¢index¤ËÂФ¹¤ë̾Á°¤òÉÕ¤±¤ë¤Ë¤Ï

df.index.name = colname1

ƱÍͤˡ¢

df.columns.name = colname2

¤Ä¤Þ¤ê

import pandas as pd
df = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]], columns=['A','B','C'])
print(df)
df.index.name='¥¤¥ó¥Ç¥Ã¥¯¥¹'
print(df)
df.columns.name = '¥«¥é¥à'
print(df)

¤È¤¹¤ë¤È¡¢²¿¤â¤Ä¤±¤Ê¤¤¤È¤­¤Ï

   A  B  C
0  1  2  3
1  4  5  6
2  7  8  9

index¤Ë̾Á°'¥¤¥ó¥Ç¥Ã¥¯¥¹'¤òÉÕ¤±¤ë¤È

        A  B  C
¥¤¥ó¥Ç¥Ã¥¯¥¹         
0       1  2  3
1       4  5  6
2       7  8  9

¹¹¤Ë¡¢column¤Ë̾Á°'¥«¥é¥à'¤òÉÕ¤±¤ë¤È

¥«¥é¥à     A  B  C
¥¤¥ó¥Ç¥Ã¥¯¥¹         
0       1  2  3
1       4  5  6
2       7  8  9

¤È¤Ê¤ë¡£

¥¤¥ó¥Ç¥Ã¥¯¥¹¤ÎÉÕ¤±Âؤ¨ drop_index¤Èset_index

[[pandas.DataFrame, Series¤Î¥¤¥ó¥Ç¥Ã¥¯¥¹¤ò¿¶¤êľ¤¹reset_index | note.nkmk.me https://note.nkmk.me/python-pandas-reset-index/]]

df = df.drop_index()  # ¥¤¥ó¥Ç¥Ã¥¯¥¹¤Ï¿·¤¿¤ËÎó¤È¤·¤Æ²Ã¤¨¤é¤ì¤ë¡£¤â¤·Îó̾¤¬½ÅÊ£¤¹¤ë¤È¥¨¥é¡¼
df = df.drop_index(drop=True)  # ¥¤¥ó¥Ç¥Ã¥¯¥¹¤ò´°Á´¤Ëºï½ü¡ÊÎó¤È¤·¤Æ»Ä¤µ¤Ê¤¤¡Ë
df = df.set_index('Îó̾')  # Îó̾¤ÎÎó¤ò¥¤¥ó¥Ç¥Ã¥¯¥¹¤È¤·¤Æ»È¤¦¡£¤½¤ÎÎó¤Ï¾Ã¤¨¤ë
df = df.reset_index.set_index('Îó̾') # ¸µ¤Î¥¤¥ó¥Ç¥Ã¥¯¥¹¤òÎó¤È¤·¤Æ»Ä¤·¤Ä¤ÄÊѹ¹

Îó°ÌÃÖ¤ÎÆþ¤ìÂØ¤¨

df = df.loc[:, ["col3", "col1", "col2", "col0"]]

¹Ô¡¦Îó¤Îdrop

pandas.DataFrame¤Î¹Ô¡¦Îó¤ò»ØÄꤷ¤Æºï½ü¤¹¤ëdrop | note.nkmk.me

df = df.drop('¹Ô̾', axis=0)   # axis=0¤Ï¥Ç¥Õ¥©¥ë¥È
df = df.drop(index='¹Ô̾')
df = df.drop('Îó̾', axis=1)
df = df.drop(column='¹Ô̾')

Ê£¿ô¹Ô¡¦Îó¤Î¾ì¹ç

df = df.drop(index=['¹Ô̾1', '¹Ô̾2']

¹ÔÈÖ¹æ¤Ç»ØÄê

df = df.drop(index=df.index[1, 3, 5]))

¤Ê¤ª¡¢Ãͤò¾ò·ï¤Ë¤·¤Æ¹Ô¤òºï½ü¡¿»Ä¤¹¤Î¤Ï¡¢

df = df[df['»²¾È']>=3]
df = df[(df['»²¾È£±']>=3) and (df['»²¾È£²']!='')]

¤Ç¤Ç¤­¤ë¡£

Îó̾¤¬½ÅÊ£¤·¤Æ¤·¤Þ¤Ã¤¿»þ¤ÎÎó¤Îºï½ü

import pandas as pd
df = pd.DataFrame([['A',1, 0.1], ['B', 2, 0.2], ['C', 3, 0.3], ['D', 4, 0.4]], 
columns=['¥é¥Ù¥ë', 'ÃÍ', 'ÃÍ'])
print(df)
#  ¥é¥Ù¥ë  ÃÍ    ÃÍ
#0   A  1  0.1
#1   B  2  0.2
#2   C  3  0.3
#3   D  4  0.4

dfx = df[['¥é¥Ù¥ë', 'ÃÍ']]    # ÃͤÏ2¤Ä»Ä¤ë
print(dfx)
#  ¥é¥Ù¥ë  ÃÍ    ÃÍ
#0   A  1  0.1
#1   B  2  0.2
#2   C  3  0.3
#3   D  4  0.4

dfx = df.drop('ÃÍ', axis=1)   # £²¤Ä¤È¤âdrop¤¹¤ë
print(dfx)
#  ¥é¥Ù¥ë
#0   A
#1   B
#2   C
#3   D

dfx = df.drop(df.columns[[2]], axis=1)  # °ÌÃÖ£²¤ò»ØÄꤷ¤Æ¤â£²¤Ä¤È¤âdrop¤¹¤ë
print(dfx)
#  ¥é¥Ù¥ë
#0   A
#1   B
#2   C
#3   D

dfx = df.copy()   # Àõ¤¤¥³¥Ô¡¼¤À¤ÈÎó̾½ñ´¹¤¨¤Ïdf¤ÎÊý¤Ë¤âµÚ¤Ö¤Î¤ÇÃí°Õ
dfx.columns = ['¥é¥Ù¥ë', 'ÃÍ', 'ÃÍ£²']   # ¤¹¤Ù¤Æ»ØÄꤹ¤ì¤ÐÎó̾¤Ï¤¹¤Ù¤Æ½ñ¤­ÊѤ¨¤é¤ì¤ë
dfx = dfx[['¥é¥Ù¥ë', 'ÃÍ']]   # ÃÍ£²¤òdrop¤¹¤ì¤Ð¤¤¤¤
print(dfx)
#  ¥é¥Ù¥ë  ÃÍ
#0   A  1
#1   B  2
#2   C  3
#3   D  4

dfx = df.rename(columns={'ÃÍ': 'ÃÍ£³'})  # ξÊý¤È¤â½ñ¤­´¹¤¨¤é¤ì¤ë
print(dfx)
#  ¥é¥Ù¥ë  ÃÍ£³   ÃÍ£³   <-- ξÊý¤È¤â'ÃÍ£³'¤Ë¤Ê¤ë¤Î¤Ç¥À¥á
#0   A   1  0.1
#1   B   2  0.2
#2   C   3  0.3
#3   D   4  0.4

·ëÏÀ¤È¤·¤Æ¡¢Îó̾¤òÁ´ÌÌŪ¤Ë½ñ´¹¤¨¤¹¤ë¤·¤«¤Ê¤µ¤½¤¦¤À¡£

MultiIndex¤Þ¤ï¤ê

MultiIndex¤ÎDF/Series¤«¤éÍ×ÁǤò»ØÄꤹ¤ë¤Ë¤Ï¡¡¢Í¡¡xs()¥á¥½¥Ã¥É¤¬¸«¤ä¤¹¤¤

pandas¤ÎMultiIndex¤«¤éǤ°Õ¤Î¹Ô¡¦Îó¤òÁªÂò¡¢Ãê½Ð | note.nkmk.me

print(df.xs(¥¤¥ó¥Ç¥Ã¥¯¥¹Ì¾, level='¥ì¥Ù¥ë̾))

¤¿¤È¤¨¤Ð

                         val_1  val_2
level_1 level_2 level_3              
A0      B0      C0          98     90
                C1          44      9
        B1      C2          39     17
                C3          75     71
A1      B2      C0           1     89
                C1          54     60
        B3      C2          47      6
                C3          16      5
A2      B0      C0          75     22
                C1          19      4
        B1      C2          25     52
                C3          57     40

¤ËÂФ·¤Æ¡¢

print(df.xs('B1', level='level_2'))  # level_2¤¬'B1'¤Ç¤¢¤ë¹Ô¤òÁªÂò

¤È¤¹¤ë¤È

#                  val_1  val_2
# level_1 level_3              
# A0      C2          39     17
#         C3          75     71
# A2      C2          25     52
#         C3          57     40

pandas¤ÎMultiIndex¤«¤éǤ°Õ¤Î¹Ô¡¦Îó¤òÁªÂò¡¢Ãê½Ð | note.nkmk.me

#multindex
import pandas as pd

# Create DF
columns = ['level_1','level_2','level_3','val_1','val_2']
l = \
[['A0',      'B0',      'C0',     98,     90],
 ['A0',      'B0',      'C1',     44,      9],
 ['A0',      'B1',      'C2',     39,     17],
 ['A0',      'B1',      'C3',     75,     71],
 ['A1',      'B2',      'C0',      1,     89],
 ['A1',      'B2',      'C1',     54,     60],
 ['A1',      'B3',      'C2',     47,      6],
 ['A1',      'B3',      'C3',     16,      5],
 ['A2',      'B0',      'C0',     75,     22],
 ['A2',      'B0',      'C1',     19,      4],
 ['A2',      'B1',      'C2',     25,     52],
 ['A2',      'B1',      'C3',     57,     40],
 ['A3',      'B2',      'C0',     64,     54],
 ['A3',      'B2',      'C1',     27,     96],
 ['A3',      'B3',      'C2',    100,     77],
 ['A3',      'B3',      'C3',     22,     50]]
df = pd.DataFrame(l, columns=columns)
print(df)
df_m = df.set_index(['level_1', 'level_2', 'level_3'])  # muyltiIndex²½
print()
print(df_m)
print()
print(df_m.xs('B1', level='level_2'))  # xs¤Ë¤è¤ë¥¢¥¯¥»¥¹
print()

df_T = df_m.T    # žÃÖ¤ÇmultiColumn²½
print(df_T)
print()
print(df_T.xs('B1', level='level_2', axis=1))  # xs¤Ïaxis=1¤ÇmultiColumn¤ËÂбþ

·ë²Ì¤Ï

   level_1 level_2 level_3  val_1  val_2
0       A0      B0      C0     98     90
1       A0      B0      C1     44      9
2       A0      B1      C2     39     17
3       A0      B1      C3     75     71
4       A1      B2      C0      1     89
5       A1      B2      C1     54     60
6       A1      B3      C2     47      6
7       A1      B3      C3     16      5
8       A2      B0      C0     75     22
9       A2      B0      C1     19      4
10      A2      B1      C2     25     52
11      A2      B1      C3     57     40
12      A3      B2      C0     64     54
13      A3      B2      C1     27     96
14      A3      B3      C2    100     77
15      A3      B3      C3     22     50

                         val_1  val_2
level_1 level_2 level_3              
A0      B0      C0          98     90
                C1          44      9
        B1      C2          39     17
                C3          75     71
A1      B2      C0           1     89
                C1          54     60
        B3      C2          47      6
                C3          16      5
A2      B0      C0          75     22
                C1          19      4
        B1      C2          25     52
                C3          57     40
A3      B2      C0          64     54
                C1          27     96
        B3      C2         100     77
                C3          22     50


                  val_1  val_2
level_1 level_3              
A0      C2          39     17
        C3          75     71
A2      C2          25     52
        C3          57     40

level_1  A0              A1              A2              A3             
level_2  B0      B1      B2      B3      B0      B1      B2       B3    
level_3  C0  C1  C2  C3  C0  C1  C2  C3  C0  C1  C2  C3  C0  C1   C2  C3
val_1    98  44  39  75   1  54  47  16  75  19  25  57  64  27  100  22
val_2    90   9  17  71  89  60   6   5  22   4  52  40  54  96   77  50

level_1  A0      A2    
level_3  C2  C3  C2  C3
val_1    39  75  25  57
val_2    17  71  52  40

MultiIndex¤ò¥Õ¥é¥Ã¥È²½¤¹¤ë»þ¤Ë

pandas¤ÎMultiindex¤Î»ØÄꡦÄɲᦲò½ü¡¦¥½¡¼¥È¡¦¥ì¥Ù¥ëÊѹ¹ | note.nkmk.me

´ðËÜ¤Ï reset_index() ¤ÇºÑ¤à¤Î¤À¤¬¡¢Îó̾¤ò¥«¥Ã¥³¤è¤¯¤Ä¤±¤ë¤³¤È¤â²Äǽ¡£ pandas¤Î¥Þ¥ë¥Á¥«¥é¥à¤ò¤¤¤¤´¶¤¸¤Ë½èÍý¤¹¤ëtips - Qiita

df.columns.values

¤ò»È¤¦¤È¡¢Îó̾¤¬¥¿¥×¥ë¤Î¥ê¥¹¥È¤Ë¤Ê¤Ã¤Æ½Ð¤Æ¤¯¤ë¡£

array([('score', 'count'), ('score', 'max'), ('score', 'min'),
       ('score', 'mean'), ('score', 'std')], dtype=object)

¤Î¤Ç¤³¤ì¤ò»È¤Ã¤Æ

def get_converted_multi_columns(df):
    return [col[0] + '_' + col[1] for col in df.columns.values]
    ¤«¤Þ¤¿¤Ï
    return [col[0] + col[1].capitalize() for col in df.columns.values]

¤Ê¤É¤È¤Ç¤­¤ë¡£¤³¤ì¤òreset_index¤·¤¿¸å¤Ç»È¤¦¤È¤¤¤¤¤Î¤Ç¤Ï¡©

Series¤«¤éDataFrame¤ØÊÑ´¹

s = pd.Series(['A','B','C'])
df = pd.DataFrame([s])
print(df)
#    0  1  2
# 0  A  B  C

¤½¤³¤ÇžÃÖ¤¹¤ë¤È

df = pd.DataFrame([s]).T
print(df)
#    0
# 0  A
# 1  B
# 2  C

¤â¤¦¾¯¤·Îã¤ò¡§

#multindex
import pandas as pd
# Create DF
columns  = ['level_1','level_2','level_3','val_1','val_2', 'val_3']

l = \
[[ 'X0',      'Y0',      'Z0',      'U',      'U',    'V'],
 [ 'X1',      'Y1',      'Z1',      'P',      'Q',    'P'],
 [ 'A0',      'B0',      'C0',     98,     90,    5],
 [ 'A0',      'B0',      'C1',     44,      9,    4],
 [ 'A0',      'B1',      'C2',     39,     17,    3],
 [ 'A0',      'B1',      'C3',     75,     71,    2],
 [ 'A1',      'B2',      'C0',      1,     89,    1],
 [ 'A1',      'B2',      'C1',     54,     60,    9],
 [ 'A1',      'B3',      'C2',     47,      6,    8],
 [ 'A1',      'B3',      'C3',     16,      5,    7],
 [ 'A2',      'B0',      'C0',     75,     22,    6],
 [ 'A2',      'B0',      'C1',     19,      4,    51],
 [ 'A2',      'B1',      'C2',     25,     52,    52],
 [ 'A2',      'B1',      'C3',     57,     40,    53],
 [ 'A3',      'B2',      'C0',     64,     54,    54],
 [ 'A3',      'B2',      'C1',     27,     96,    55],
 [ 'A3',      'B3',      'C2',    100,     77,    56],
 [ 'A3',      'B3',      'C3',     22,     50,    57]]
df = pd.DataFrame(l, columns=columns)
print(df)
df_m = df.set_index(['level_1', 'level_2', 'level_3'])  # multiIndex²½
print()
print(df_m)
print()
print(df_m.xs('B1', level='level_2'))  # xs¤Ë¤è¤ë¥¢¥¯¥»¥¹
print()
df_T = df_m.T    # žÃÖ¤ÇmultiColumn²½
print('df_T\n', df_T)
print()
#print(df_T.xs('B1', level='level_2', axis=1))  # xs¤Ïaxis=1¤ÇmultiColumn¤ËÂбþ

df_X = df_T.set_index([('X0', 'Y0', 'Z0'), ('X1', 'Y1', 'Z1')])
print(df_X.index)
index = pd.MultiIndex.from_tuples(df_X.index, names=['First', 'Second'])
df_X.index = index
print('df_X\n', df_X)
print()
df_Y = df_X.xs('B1', level='level_2', axis=1)  # xs¤Ë¤è¤ë¥¢¥¯¥»¥¹
print('df_Y\n', df_Y)
print()
print(df_Y.index)
df_R = df_Y.reset_index(level=('X0', 'Y0', 'Z0'))
print('df_R\n', df_R)

¤³¤ÎÊÕ¤¬¤è¤µ¤½¤¦¤À¡£pd.MultiIndex.from_tuple()¤ò»È¤¦

import pandas as pd
index = pd.MultiIndex.from_tuples([('bird', 'falcon'),
                                   ('bird', 'parrot'),
                                   ('mammal', 'lion'),
                                   ('mammal', 'monkey')],
                                   names=['class', 'name'])
columns = pd.MultiIndex.from_tuples([('speed', 'max'),
                                     ('species', 'type')])
df = pd.DataFrame([(389.0, 'fly'),
                   ( 24.0, 'fly'),
                   ( 80.5, 'run'),
                   (np.nan, 'jump')],
                  index=index,
                  columns=columns)
print(df)

#print(df.reset_index())
#print(df.reset_index(level='class'))

# column̾Êѹ¹¡©
columns = pd.MultiIndex.from_tuples([('P', 'Q'),
                                     ('R', 'S')])
df_R = df.copy()
df_R.columns = columns
print('df_R\n', df_R)
df_RR = df.copy()
df_RR[('P', 'Q')] = df[('speed', 'max')]
df_RR = df_RR.drop(('speed', 'max'), axis=1)
print('df_RR\n', df_RR)

print()
print('reset\n', df_RR.reset_index(level='name'))

¼­½ñ¤Îȿž

Python¤Ç¼­½ñ¤Î¥­¡¼¤ÈÃͤòÆþ¤ìÂØ¤¨¤ë | note.nkmk.me

d_swap = {v: k for k, v in d.items()}

DataFrame¤«¤é¼­½ñ

pandas.DataFrame, Series¤ò¼­½ñ¤ËÊÑ´¹¡Êto_dict¡Ë | note.nkmk.me

DataFrame¤Î2¤Ä¤ÎÎó¤Î´Ö¤Î´Ø·¸¤ò¼­½ñ¤Ë¤¹¤ë¤¿¤á¤Ë¤Ï¡¢

pair = df[['¥­¡¼Â¦', '¥Ð¥ê¥å¡¼Â¦']]
ser = pair['¥Ð¥ê¥å¡¼Â¦']
ser.index = pair['¥­¡¼Â¦']
dic = ser.to_dict()

Series/DataFrame¤«¤é¥ê¥¹¥È¤ËÊÑ´¹¤¹¤ë¤Ê¤é .values.tolist()

pandas.DataFrame, Series¤ÈPythonɸ½à¤Î¥ê¥¹¥È¤òÁê¸ß¤ËÊÑ´¹ | note.nkmk.me

Series¤ÎÃÍÉôʬ¤À¤±¤Ê¤é

s = pd.Series([0, 1, 2])
l_1d = s.values.tolist()

DataFrame¤ÎÃÍÉôʬ¤À¤±¤Ê¤é

df = pd.DataFrame([[0, 1, 2], [3, 4, 5]])
l_2d = df.values.tolist()
print(l_2d)
# [[0, 1, 2], [3, 4, 5]]

¹Ô¥é¥Ù¥ë¡Ê¹Ô̾¡Ë¤È¤â¥ê¥¹¥È¤ËÊÑ´¹¤¹¤ë¤Ë¤Ï¡¢·ë¶Éreset_index¤È¤«¤Ç¥¤¥ó¥Ç¥Ã¥¯¥¹¤òÎó¤Ë´Þ¤á¤ë

l_1d_index = s_index.reset_index().values.tolist()
print(l_1d_index)
# [['row1', 0], ['row2', 1], ['row3', 2]]

Îó¥é¥Ù¥ë¡ÊÎó̾¡Ë¤Ïreset_index¤ËÁêÅö¤¹¤ëµ¡Ç½¤¬Ìµ¤¤¤Î¤Ç¡¢1¤Ä¤ÎÊýË¡¤È¤·¤ÆÅ¾ÃÖ¤·¤Æ¤«¤éreset_index¤ò¤«¤±¤ë¡£

l_2d_index_columns = df_index.reset_index().T.reset_index().T.values.tolist()
print(l_2d_index_columns)
# [['index', 'col1', 'col2', 'col3'], ['row1', 0, 1, 2], ['row2', 3, 4, 5]]

¥é¥Ù¥ë¤À¤±¤Ê¤é¡¢df.index¡¢df.columns¤À¤¬¡¢¥ê¥¹¥È¤Ë¤¹¤ë¤¿¤á¤Ëtolist()¤·¤Æ¤ª¤¯

df.index.tolist()
df.columns.tolist()

½ÅÊ£¤·¤¿¹Ô¤òdrop¤¹¤ë

duplicated¤Èdrop_duplicates

df = df.duplicated(keep='last', subset=['¤³¤ÎÎó¤Ç¤Î½ÅÊ£']) # ½ÅÊ£¤Ê¤éTrue¡¢¤Ê¤¤¤Ê¤éFalse¤Î1Îó
df = df[~df.duplicated(keep='...', subset=['..', '..'])]  # ½ÅÊ£°Ê³°¤ò¤Ò¤í¤¦¡Ê¡ádrop_duplicates¡Ë

df = df.drop_duplicates(keep='last')  # ½ÅÊ£¤·¤¿Îó¤òºÇ¸å¤Î1¤Ä¤À¤±»Ä¤·¤Æºï½ü

ʸ»úÎó¤ò´Þ¤à¹Ô¤ÎÃê½Ð

pandas¤ÇÆÃÄê¤Îʸ»úÎó¤ò´Þ¤à¹Ô¤òÃê½Ð¡Ê´°Á´°ìÃס¢Éôʬ°ìÃ×¡Ë | note.nkmk.me

´°Á´°ìÃפϡ¡==¡¡¤Ç¤è¤¤¡£

dfx = df[df['state'] == 'CA']

Éôʬ°ìÃפϡ¢in¡¡¤Ï¥À¥á¤é¤·¤¤¡£str.contains()¡¡¤ò»È¤¦¡£

dfx = df[df['name'].str.contains('li')

⤷¡¢¥Ç¥Õ¥©¥ë¥È¤Ç¤ÏÂè1°ú¿ô¤òÀµµ¬É½¸½¤È²ò¼á¤¹¤ë¡ÊÍפ¹¤ë¤Ëre.search¤ÈƱÍ͡ˤΤǡ¢ ¥Ô¥ê¥ª¥ÉÅù¤ò´Þ¤à¾ì¹ç¤Ë¤ÏÀµ¤·¤¯¤Ê¤¤¡£²óÈò¤¹¤ë¤¿¤á¤Ë¤Ï¡¡regex=False¡¡¤ò»ØÄꤹ¤ë¤È¤è¤¤¡£

dfx = df[df['name'].str.contains('li', regex=False)

¤Þ¤¿¡¢Í×ÁǤ¬·ç»ÃÍNaN¤Ç¤¢¤ë¾ì¹ç¡¢¥Ç¥Õ¥©¥ë¥È¤Ç¤ÏNaN¤òÊÖ¤¹¤Î¤Ç¡¢¹Ô¤òÃê½Ð¤¹¤ë¤È¥¨¥é¡¼¤Ë¤Ê¤ë¡£¥ª¥×¥·¥ç¥ó¤È¤·¤Æ¡¡na=True/False¡¡¤ò»ØÄꤹ¤ë¡£

dfx = df[df['name'].str.contains('li', na=True)]   # NaN¤ÏÃê½Ð·ë²Ì¤Ë´Þ¤Þ¤ì¤ë
dfx = df[df['name'].str.contains('li', na=True)]   # NaN¤ÏÃê½Ð·ë²Ì¤Ë´Þ¤Þ¤Ê¤¤

¤Ê¤ª¡¢str.contains¤Î¾¡¢¡¡str.startswith¡¡¤ä¡¡str.endswidth¡¡¹¹¤Ë¡¡str.match(¡Áre.match¤ÈƱÍÍ¤ÎÆ°ºî¡Ë¡¡¤¬»È¤¨¤ë¡£

Í×ÁǤ¬»ØÄê¥ê¥¹¥È¤Ë´Þ¤Þ¤ì¤ë¹Ô¤ÎÃê½Ð¡¡¡Êʸ»úÎó¤Ç¤Ï¤Ê¤¯¤Æ¥ê¥¹¥È¡Ë

[[pandas.DataFrame¤Î¹Ô¤ò¾ò·ï¤ÇÃê½Ð¤¹¤ëquery | note.nkmk.me: https://note.nkmk.me/python-pandas-query/]]

´ðËÜŪ¤Ë¡¢isin¥á¥½¥Ã¥É¤ò»È¤¦¤«¡¢query¥á¥½¥Ã¥É¤ÎÃæ¤Ç in ¤ò»È¤¦¤«¡£

dfx = df[df['state'].isin(['NY', 'TX'])]

¤«

dfx = df.query('state in ["NY", "TX"]')

¤³¤ì¤â²Ä¡§

dfx = df.query('state == ["NY", "TX"]'

Groupby

pandas.DataFrame¤òGroupBy¤Ç¥°¥ë¡¼¥Ô¥ó¥°¤·Åý·×Î̤ò»»½Ð | note.nkmk.me

grouped = df.groupby('¥°¥ë¡¼¥×²½¤·¤¿¤¤Îó̾')

¤³¤Î·ë²Ì¤ËÂФ·¤ÆÅý·×±é»»¤¬¤Ç¤­¤ë

df = grouped.sum()

»È¤¨¤ë¤Î¤Ïsum(), mean(), min(), max(), std(), var()¤Ê¤É

¤Þ¤¿¡¢agg()¤ò»È¤Ã¤Æ¡¢´Ø¿ô¤òŬÍѤ¹¤ë¤³¤È¤¬¤Ç¤­¤ë¡£´Ø¿ô¤ÏSeries¤ò¼õ¤±¼è¤Ã¤Æ object¤òÊÖ¤¹´Ø¿ô¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£

df = grouped.agg(lambda x: type(x))['sl'])
df = grouped.agg(min)

Îó¤´¤È¤Ë°Û¤Ê¤ë´Ø¿ô¤òŬÍѤ¹¤ë¤³¤È¤â²Äǽ¡£

df = grouped.agg({'Îó£±': min}, {'Îó£²': max})

Ê£¿ô¤ÎÎó¤ò¥­¡¼¤Ë¤·¤Æ¥°¥ë¡¼¥×²½¤Ç¤­¤ë¡£

gf = df.groupby(['Îó£±', 'Îó£²']).mean()

as_index=False¤ò»ØÄꤹ¤ë¤È¡ÊTrue»þ¤Ï¥­¡¼¤¬·ë²ÌDF¤Î¥¤¥ó¥Ç¥Ã¥¯¥¹¤Ë¤Ê¤ë¤¬¡Ë¡¢¥­¡¼¤Ï·ë²ÌDF¤Î¹Ô¤Î¤Þ¤Þ¤Ç»Ä¤ê¥¤¥ó¥Ç¥Ã¥¯¥¹¤Ï¸µ¤Î¤Þ¤Þ

df = pd.DataFrame({
    'city': ['osaka', 'osaka', 'osaka', 'osaka', 'tokyo', 'tokyo', 'tokyo'],
    'food': ['apple', 'orange', 'banana', 'banana', 'apple', 'apple', 'banana'],
    'price': [100, 200, 250, 300, 150, 200, 400],
    'quantity': [1, 2, 3, 4, 5, 6, 7]
})

gb = df.groupby(['city', 'food']).mean()  # as_index»ØÄê¤Ê¤·¡Ê¥Ç¥Õ¥©¥ë¥È¤ÏTrue¡Ë
print(gb)

¤ËÂФ·¤Æ

               price  quantity
city  food                   
osaka apple   100.0       1.0
      banana  275.0       3.5
      orange  200.0       2.0
tokyo apple   175.0       5.5
      banana  400.0       7.0

¤À¤¬¡¢as_index=False¤Ë¤¹¤ë¤È

gb_noindex = df.groupby(['city', 'food'], as_index=False).mean()
print(gb_noindex)

¤ËÂФ·¤Æ

     city    food  price  quantity

0  osaka   apple  100.0       1.0
1  osaka  banana  275.0       3.5
2  osaka  orange  200.0       2.0
3  tokyo   apple  175.0       5.5
4  tokyo  banana  400.0       7.0

¤Î¤è¤¦¤Ëcity¤Èfood¤ÎÎ󤬻Ĥ롣

ñ¤Ë¸Ä¿ô¤ò¥«¥¦¥ó¥È¤·¤¿¤¤¾ì¹ç¡¢²¼µ­¤Îsize()¤Ç¤â¤è¤¤¤·¡¢´Ø¿ôcount()¤ò»È¤Ã¤Æ¤è¤¤¡£
Pandas¤Ç¥Ç¡¼¥¿¤Î¸Ä¿ô¤ò¿ô¤¨¾å¤²¤ëcount´Ø¿ô¤Î»È¤¤Êý - DeepAge

dfcount = df.groupby('city').count()
print(dfcount)
#       food  price  quantity
#city                        
#osaka     4      4         4
#tokyo     3      3         3

dfcount = df.groupby('city')[['food']].count().rename(columns={'food': 'count'})
print(dfcount)
#       count
#city        
#osaka      4
#tokyo      3

Groupby¤Î¥°¥ë¡¼¥×¤ò¼è½Ð¤¹

Pandas ¤Î groupby ¤Î»È¤¤Êý - Qiita

df.groupby('city').groups

{'osaka': Int64Index([0, 1, 2, 3], dtype='int64'),
 'tokyo': Int64Index([4, 5, 6], dtype='int64')}

df.groupby('city').get_group('osaka')

city	food	price	quantity
0	osaka	apple	100	1
1	osaka	orange	200	2
2	osaka	banana	250	3
3	osaka	banana	300	4

df.groupby('city').size()

city
osaka    4
tokyo    3
dtype: int64

df.groupby('city').size()['osaka']

4

map, apply

pandas¤ÇÍ×ÁÇ¡¢¹Ô¡¢Îó¤Ë´Ø¿ô¤òŬÍѤ¹¤ëmap, applymap, apply | note.nkmk.me

Ê£¿ôdf¤òjoin

reference

DF¤Î¥ê¥¹¥È joinlist = [df1, df2, ..., dfn] ¤òÍѰդ·¡¢df¤Ëjoin¤¹¤ë¡£

dfout = df.join(joinlist)

¤³¤Î¤È¤­¡¢¥­¡¼»ØÄê¡Ê"on"¤Î»ØÄê¡Ë¤Ï¤Ç¤­¤Ê¤¤¡£¥¤¥ó¥Ç¥Ã¥¯¥¹¤ò¥­¡¼¤Ë¤·¤Æjoin¤¹¤ë¡£

¤Ä¤Þ¤ê¡¢¤¢¤é¤«¤¸¤á¥¤¥ó¥Ç¥Ã¥¯¥¹¤òÉÕ¤±Âؤ¨¤Æ¤ª¤¯É¬Íפ¬¤¢¤ë¡£

df1 = df1.set_index('column to use as key')

NaN°·¤¤

NaN¤Ïnumpy¤Înan

DF¤Ç¤Ê¤±¤ì¤Ð

if np.isnan(np.nan):
    hogehoge

DF¤Ç¤ÎnanȽÄê

Reference¢ªAPI reference ¤Îgeneral function¤Ë¤¢¤ë

df = pd.DataFrame([[1,3],[2,np.nan]])
print(df.isnull())
print(df.notnull())

¤Ê¤ª¡¢isnull()¤Ïisna()¤Îalias¡¢notnull()¤Ïnotna()¤Îalias

NoneType¤ÎȽÄê

NoneType¤ÈNaN¤Ï°ã¤¦¤é¤·¤¤¡£·ëÏÀ¤È¤·¤Æ is None ¤¬Í­¸ú

x = None
print(x is None)

[python]ÊÑ¿ô¤¬NoneType¤Ç¤¢¤ë¤«¤òȽÄꤹ¤ë | akamist blog https://akamist.com/blog/archives/3067

½é¤á¤Ë¶õ¤ÎDataFrame¤òºî¤Ã¤Æ¸å¤«¤é¥Ç¡¼¥¿¹Ô¤ò­¤¹

df = pd.DataFrame( columns=['A', 'B'] )
for u in [0, 1, 2, 3]:
    temp_series = pd.Series( [i, i*i], index=df.columns )
    df = df.append( temp_series, ignore_index=True )

¤¢¤È¤«¤éÎó¤ò­¤¹¤Î¤Ï¡¢½Ä¤Î¥Ç¡¼¥¿¤ÎŤµ¤¬Æ±¤¸¤Ê¤é´Êñ¤Ç¡¢¾åµ­¤Î¾ì¹ç½Ä¤¬£´¤Ê¤Î¤Ç

df['NEW'] = [3, 4, 5, 6]

Ťµ¤¬°Û¤Ê¤ë¾ì¹ç¡¢¾åµ­¤ÎÊýË¡¤Ï¥¨¥é¡¼¤Ë¤Ê¤ë¡£Series¤òºî¤Ã¤ÆÆ±¤¸¤è¤¦¤Ë½ñ¤­¤³¤á¤ÐOK.

## Test ¡Á convert list (with different length) to DF column
import pandas as pd
import numpy as np

list1 = [1, 2, 3]
list2 = [4, 5, 6, 7, 8]
list3 = [10, 11]

# ¥ê¥¹¥È¤ÎŤµ¤¬¤¹¤Ù¤ÆÆ±¤¸¤Ê¤é¤Ð¡¢Ã±½ã¤Ëdf['¿·Îó̾']=list¤ÇOK
# Ťµ¤¬°Û¤Ê¤ë¤È¥¨¥é¡¼
df = pd.DataFrame(index=[0, 1, 2, 3, 4], columns=[])
#df['A'] = list1  <-- Ťµ¤¬°ã¤¦¤Î¤Ç¥¨¥é¡¼

s = pd.Series(list1, index=[0,1,2])
df['A'] = s
print(df)  # ¤³¤ì¤À¤È¡¢ÉÔ­¤·¤¿Í×ÁÇÉôʬ¤ÏNaN
#     A
#0  1.0
#1  2.0
#2  3.0
#3  NaN
#4  NaN

df.to_excel('mytest.xlsx')  # NaN¤Î¾ì½ê¤ÏExcel¾å¤Ç¤Ï¶õÍó¤Ë¤Ê¤ë

df = df.replace(np.nan, '')  # nan¤ò¶õʸ»úÎó¤ËÃÖ´¹¤¨
print(df)
#   A
#0  1
#1  2
#2  3
#3   
#4   

df.to_excel('mytest2.xlsx')  # ¶õʸ»úÎó¤Î¾ì½ê¤ÏExcel¾å¤Ç¤Ï¶õÍó¤Ë¤Ê¤ë

collections.Counter¤Î·ë²Ì¤ò²Ã¤¨¤ë

[Python] ¥ê¥¹¥È¤ÎÍ×ÁǤοô¤ò¿ô¤¨¤ë (collections.Counter) | Hibiki Programming Notes

c1 = collections.Counter('abbcabbbccca')
c1.update({'a':1, 'b':1, 'c':1})   # c1¤ò½ñ¤­´¹¤¨¤ë¤³¤È¤ËÃí°Õ¡Ê'+'¤È°Û¤Ê¤ë¡Ë

¤Þ¤¿¤Ï

c1 = collections.Counter('abbcabbbccca')
c1.update('abc')

¤â£Ï£Ë

°ú¤¯¾ì¹ç¤Ï¡¢subtract

>>> c = Counter(a=4, b=2, c=0, d=-2)
>>> d = Counter(a=1, b=2, c=3, d=4)
>>> c.subtract(d)   # c¤ò½ñ¤­´¹¤¨¤ë¤³¤È¤ËÃí°Õ¡Ê'-'¤È°Û¤Ê¤ë¡Ë
>>> c
Counter({'a': 3, 'b': 0, 'c': -3, 'd': -6})

£°¤ä¥Þ¥¤¥Ê¥¹¤Ë¤Ê¤êÆÀ¤ë

¤Þ¤¿¡¢8.3. collections --- ¥³¥ó¥Æ¥Ê¥Ç¡¼¥¿·¿ — Python 3.6.10rc1 ¥É¥­¥å¥á¥ó¥È¤Ç¤Ï

>>> c = Counter(a=3, b=1)
>>> d = Counter(a=1, b=2)
>>> c + d                       # add two counters together:  c[x] + d[x]
Counter({'a': 4, 'b': 3})
>>> c - d                       # subtract (keeping only positive counts)
Counter({'a': 2})
>>> c & d                       # intersection:  min(c[x], d[x]) 
Counter({'a': 1, 'b': 1})
>>> c | d                       # union:  max(c[x], d[x])
Counter({'a': 3, 'b': 2})

°ú¤­»»¤Ï£°¤äÉé¤Ë¤Ê¤ë¤ÈÍ×ÁǤ¬¾Ã¤¨¤ë¡ÊCount¤À¤«¤é¡©¡Ë¤Î¤ÇÃí°Õ¡Ásubtract¤È¿¶¤ëÉñ¤¤¤¬°Û¤Ê¤ë¡£

´ðËÜŪ¤Ë¡¢¥«¥¦¥ó¥È¤¬0¤Î¹àÌܤϺï¤ëÊý¸þ¤Ëư¤¯¡£½¾¤Ã¤Æ¡¢CounterƱ»Î¤ò­¤·¤¿¤ê°ú¤¤¤¿¤ê ¤·¤¿·ë²Ì¡¢Ãͤ¬0¤Ë¤Ê¤ì¤Ðprint¤·¤Æ¤â¸«¤¨¤Ê¤¯¤Ê¤ë¡£

Counter¤Ï¤Û¤Ü¼­½ñ¤ÈƱ¤¸¤Ê¤Î¤Ç¡¢¼¡¹à¤Î¥½¡¼¥È¤¬»È¤¨¤ë¡£

¼­½ñ¤Î¥½¡¼¥È

Python¤Î¼­½ñ(dict)¤ò¥½¡¼¥È¤¹¤ëÊýË¡¤Þ¤È¤á | HEADBOOST

d = {'b': 2, 'a': 3}
print(sorted(d)) # key¤ò¼è½Ð¤·¤Æ¥½¡¼¥È
print(sorted(d.keys())) # ¾å¤ÈƱ¤¸¤³¤È
print(sorted(d.values()))
print(sorted(d.items())) # item¤ò¼è½Ð¤·¤Ækey½ç¤Ç¥½¡¼¥È
print(sorted(d.items(), key = lambda x : x[1])) # item¤ò¼è½Ð¤·¤Ævalue½ç¤Ç¥½¡¼¥È

¤³¤ì¤ò»È¤Ã¤Æ¡¢codons¿ô¤ò¿ô¤¨¤Æ½ÐÎϤòcodon̾½ç¤Ë¥½¡¼¥È

import pandas as pd
import collections
# dict (codon count) ¤òDF¤Ë½ñ¤­¹þ¤à
# d = {'B':1, 'C':2}
# df= pd.DataFrame(columns=['A', 'B', 'C'])
# ·ë²Ì¤È¤·¤ÆA¤Ï0, B¤Ï1, C¤Ï2¤È½ñ¤¤¤ÆÍߤ·¤¤

zerodic = collections.Counter([])  # ¤³¤ì¤ÏÉԲġ¡¥¨¥ó¥È¥ê¡¼C¤¬Æþ¤é¤Ê¤¤
zerodic = collections.Counter({'A':0, 'B':0, 'C':0})
print(zerodic)  ## °Õ¿Þ¤Ï{'A', 0:, 'B': 0, 'C':0}
d = collections.Counter({'B':1, 'C':2})
print(zerodic+d)  # ¤³¤ì¤ÏÉԲġ£¥«¥¦¥ó¥È¤¬£°¤Î¥¨¥ó¥È¥ê¡¼C¤¬Æþ¤é¤Ê¤¤
#
zerodic.update({'B':1, 'C':2})  # update¤À¤È¸µ¤Îzerodic¤¬¤½¤Î¤Þ¤Þ·¿»æ¤Ë¤Ê¤ë
              # ⤷update¤Ïin-place¤ÇÃÖ¤­´¹¤¨¤ë¤Î¤ÇÃí°Õ
print(zerodic)
# ¤¢¤È¤Ï¥­¡¼¤Ç¥½¡¼¥È¤·¤ÆÃͤò¥ê¥¹¥È¤Ë¤·¤Æ¡¢pd.Series¤ËÊÑ´¹¤·¤Æ¡¢pd.DataFrame¤ËÄɲÃ
szero = sorted(zerodic.items(), key=lambda x: x[0])  # ¥Ú¥¢¤Ç¥½¡¼¥È¡¢½ÐÎϤϥڥ¢¤Î¥ê¥¹¥È
szerolist = [u[1] for u in szero]  # ÃͤÀ¤±¤Î¥ê¥¹¥È¤Ë¤¹¤ë
print(szerolist)
ser = pd.Series(szerolist, index=df.columns)
df = df.append(ser, ignore_index=True)
print(df)

read_csv¤Èto_csv

read_csv

pandas¤Çcsv/tsv¥Õ¥¡¥¤¥ëÆÉ¤ß¹þ¤ß¡Êread_csv, read_table¡Ë | note.nkmk.me
pandas.read_csv — pandas 0.25.0 documentation
codecs — Codec registry and base classes — Python 3.7.4 documentation

import pandas as pd
pd.read_csv('filename', sep='\t', header=0, skiprows=[], encoding='cp932')

¤Þ¤¿¤Ï

with open('filename', 'r', encoding='...') as fin:
    pd.read_csv(fin, ... )

¤Ê¤ª¡¢separator¤¬Ê£¿ô¶õÇò¤ò´Þ¤à¤È¤­¡¢

pd.read_csv('filename', sep='\s+')

¤¬»È¤¨¤ë¡£python - How to make separator in pandas read_csv more flexible wrt whitespace? - Stack Overflow

to_csv

df = ...
df.to_csv('filename', ...)

to_excel¤ÈÊ£¿ô¥·¡¼¥È½ñ½Ð¤·

pandas.DataFrame.to_excel — pandas 0.25.0 documentation

If you wish to write to more than one sheet in the workbook, it is necessary to specify an ExcelWriter object:

>>> df2 = df1.copy()
>>> with pd.ExcelWriter('output.xlsx') as writer:  # doctest: +SKIP
...     df1.to_excel(writer, sheet_name='Sheet_name_1')
...     df2.to_excel(writer, sheet_name='Sheet_name_2')

¤Þ¤¿¤Ï¡¢with¤ò»È¤ï¤Ê¤¤ÊýË¡¤È¤·¤Æ

writer = pd.ExcelWriter('output.xlsx')
...
df1.to_excel(writer, sheet_name='Sheet_name_1')
df2.to_excel(writer, sheet_name='Sheet_name_2')
...
writer.save()  # <-- ¤³¤ì¤¬É¬Í×
writer.close() # <-- ¤³¤ì¤¬É¬Í×

read_excel¤Ç¥Ñ¥¹¥ï¡¼¥ÉÉÕ¤­¤ÎExcel¥Õ¥¡¥¤¥ë¤òÆÉ¤à¾ì¹ç

¥Ñ¥¹¥ï¡¼¥Éµ¡Ç½¤òľÀÜpandas¡Ê¤äopenxylx, xlrd¡Ë¤Ë¼è¤ê¹þ¤àÏÃ¤ÏÆñ¤·¤¤¤è¤¦¤À¡£

pip install msoffcrypto-tool
import tempfile
import msoffcrypto
import pandas as pd
from pathlib import WindowsPath
file_dir = WindowsPath(r"¥¢¥ó¥±¡¼¥È¤¬Æþ¤Ã¤¿¥Ç¥£¥ì¥¯¥È¥ê¤Î¥Ñ¥¹")
# ²óÅú¥Õ¥¡¥¤¥ë¤ò½ç¼¡³Îǧ
for file in file_dir.glob("*.xlsm"):
    # ¥Ñ¥¹¥ï¡¼¥É²ò½ü¤·¤¿¥Æ¥ó¥Ý¥é¥ê¥Õ¥¡¥¤¥ëºîÀ®
    with file.open("rb") as f, tempfile.TemporaryFile() as tf:
        office_file = msoffcrypto.OfficeFile(f)
        office_file.load_key(password="¥Ñ¥¹¥ï¡¼¥É")
        office_file.decrypt(tf)
        # ¥Æ¥ó¥Ý¥é¥ê¥Õ¥¡¥¤¥ë¤«¤é²óÅú¤ò¥í¡¼¥É
        df = pd.read_excel(tf, header=None)

read_excel¤Ç³¬ÁØ¥«¥é¥à¤òÆÉ¤à¾ì¹ç

pandas¤ÇÊ£¿ô¹Ô¥Ø¥Ã¥À¡¼¤ò»ý¤ÄExcel¥Õ¥¡¥¤¥ë¤òÆÉ¹þ¤ß¥«¥é¥à(columns)¤òÀ°·Á¤¹¤ë - Qiita

³¬ÁØ¥«¥é¥à¤ÎÎã.png

df = pd.read_excel(filename, sheet_name='xxx', skiprows=y, header=[u, v, w])

¤³¤ì¤Ç¡¢¤Þ¤ºÀèÆ¬y¹Ôʬ¤ò¥¹¥­¥Ã¥×¤·¤¿¸å¡¢header¤ÇÎó̾Éôʬ¤òÆÉ¤à¤¬¡¢¡Ê¥¹¥­¥Ã¥×¤·¤¿¸å¤Î¡Ëu¹ÔÌÜ¡¢v¹ÔÌÜ¡¢w¹ÔÌܤò¤½¤ì¤¾¤ìÂ裱³¬ÁØ¡¦Â裲³¬ÁØ¡¦Â裳³¬ÁؤÎÎó̾¤È¤¹¤ë¡£

³¬ÁØ¥«¥é¥à¤ò¥Õ¥é¥Ã¥È²½¤¹¤ë

python - pandas DataFrame¤Ç³¬ÁØÎ󥤥ó¥Ç¥Ã¥¯¥¹¤ò¥Õ¥é¥Ã¥È²½¤¹¤ë¤Ë¤Ï¤É¤¦¤¹¤ì¤Ð¤è¤¤¤Ç¤¹¤«¡© - IT¥Ä¡¼¥ë¥¦¥§¥Ö

   A     B   
   a  b  a  b
0  0  1  2  3
1  4  5  6  7

¤ò

   Aa  Ab  Ba  Bb
0   0   1   2   3
1   4   5   6   7

¤Î¤è¤¦¤Ë¤·¤¿¤¤¡¢¤È¸À¤¦¾ì¹ç¡£

Îó̾¤ò½ñ¤­Ä¾¤¹¤È¤¹¤ì¤Ð¡¢

df.columns = df.columns.map(''.join)

Ëü¤¬°ìÎó̾¤¬Ê¸»úÎó¤Ç¤Ê¤¤¾ì¹ç¤Ï¡¢Ê¸»úÎó¤ËÊÑ´¹

df.columns = df.columns.map(lambda x: ''.join([*map(str, x)]))

¤ò¤·¤Æ¤«¤éjoin¤¹¤ëɬÍפ¬¤¢¤ë¡£

to_excel¤Ç¡¢¥»¥ëÆâ¤Ë²þ¹Ô¤òÃÖ¤­¤¿¤¤¾ì¹ç

¥Ç¥Õ¥©¥ë¥È¤Ç¤Ï²þ¹Ô¤ÏÉԲġÊ\n¤òÆþ¤ì¤Æ¤â¸ú²Ì¤Ê¤¤¡Ë

¥»¥ë¤Î¥¹¥¿¥¤¥ë¥×¥í¥Ñ¥Æ¥£¤Ç¡¢wrap_text=True¤òÀßÄꤹ¤ë¡£

·ë¶É¡¢to_excel¤Ç¥Õ¥¡¥¤¥ë¤òºî¤Ã¤Æ¤·¤Þ¤Ã¤Æ¤«¤é¡¢¤â¤¦°ìÅÙopenpyxl¤Ç¥Õ¥¡¥¤¥ë¤ò¥ª¡¼¥×¥ó¤·¤Æ¡¢ ¥·¡¼¥ÈÆâ¤Î¤¹¤Ù¤Æ¤Î¥»¥ë¤Î¥×¥í¥Ñ¥Æ¥£¤ò¾åµ­£³ÈÖÌܤÇÊѹ¹¤¹¤ë¤Î¤¬´Êñ¤½¤¦¡£

¤Ê¤ª¡¢Êѹ¹¤ÎÊýË¡¤Ï¡¢

cell.alignment =  cell.alignment.copy(wrapText=True)

¤ÏDepreciated¤Îwarning¤¬½Ð¤ë¤Î¤Ç¡¢

cell.alignment = Alignment(wrap_text=True)

¤Ë¤·¤Æ¤ß¤¿¡£

Á´ÂΤϡ¢

# "to_Excel"¤Ç¥»¥ëÆâ²þ¹Ô¥¢¥ê¤¬²Äǽ¤«¡©
import pandas as pd
df = pd.DataFrame([['5\n4' ,3], [2,6], [8,5]])   # ¥Ç¡¼¥¿Ãæ¤Ë\n¤òÆþ¤ì¤ë
print(df)
df.to_excel("mytest2.xlsx")    # ¤È¤Ë¤«¤¯DF¤Î¥Ç¡¼¥¿¤«¤éexcel¥Õ¥¡¥¤¥ë¤òºî¤Ã¤Æ¤·¤Þ¤¦

import openpyxl
from openpyxl.styles import Alignment
wb = openpyxl.load_workbook('mytest2.xlsx')
ws = wb['Sheet1']
for row in ws.iter_rows():
    for cell in row:      
        #cell.alignment =  cell.alignment.copy(wrapText=True) # Depreciated·Ù¹ð
        #cell.style.alignment.wrap_text=True    # ¥¨¥é¡¼
        cell.alignment = Alignment(wrap_text=True)  # ¤³¤ì¤Ê¤éư¤¤¤¿
wb.save('mytest2.xlsx')    # ¥Õ¥¡¥¤¥ë¤Ø½ñ¤­½Ð¤·¡Ê½ñ¤­Ìᤷ¡Ë

¤³¤ì¤Ë¤è¤Ã¤Æ¼Â¸½¤Ç¤­¤ë¡£

matplotlibÉÁ²è½ÐÎÏ¡Êfig¥Ç¡¼¥¿¡Ë¤ò¥¤¥á¡¼¥¸¥Ç¡¼¥¿¤È¤·¤Æ°·¤¦ÊýË¡

matplotlib¤ÇÉÁ¤¤¤¿¿Þ¤ò¥¤¥á¡¼¥¸¥Ç¡¼¥¿¤Ëºî¤ë¤³¤È¤¬¤Ç¤­¤ë¡£¤¢¤Þ¤êÈþ¤·¤¯¤Ï¤Ê¤¤¤¬¡£

Python¤ÎMatplotlib¤Î¥°¥é¥Õ¤òNumPy¹ÔÎó¤ËÊÑ´¹¤·¤ÆOpenCV¤äPillow¤Ç»È¤¦ — Ë¿¥¨¥ó¥¸¥Ë¥¢¤Î¤ª»Å»ö°Ê³°¤Î¥á¥â¡Êʬºý¡Ë

from matplotlib import pyplot as plt
import numpy as np
from PIL import Image

x = np.linspace(0, 2*np.pi, 21)
y1 = np.sin(x)
y2 = np.cos(x)
y3 = y1-y2
fig, ax = plt.subplots()
ax.plot(x, y1, 'b.-')
ax.plot(x, y2, 'g,-.')
ax.plot(x, y3, 'r,-.')

fig.canvas.draw()
#im = np.array(fig.canvas.renderer.buffer_rgba())
im = np.array(fig.canvas.renderer._renderer) # matplotlib¤¬3.1¤è¤êÁ°¤Î¾ì¹ç

img = Image.fromarray(im)
img.show() # ¤³¤ì¤Ïjupyter notebook¾å¤Çư¤¤¤¿¡¡PIL¤Îim.show()¤Î½ÐÎϤÏXwin¤Ø¹Ô¤¯

¤â¤¦£±¤Ä¤ÎÎã¤Ï¡Êrenderer¤Î½ÐÎϤòʸ»ú¤Ë¤¹¤ë¡Ë How to convert a matplotlib figure to a numpy array or a PIL image | ICARE Data and Services Center

import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

# make an agg figure
fig, ax = plt.subplots()
ax.plot([1, 2, 3])
ax.set_title('a simple figure')

fig.canvas.draw()

# grab the pixel buffer and dump it into a numpy array
#X = np.array(fig.canvas.renderer.buffer_rgba()) # ¤³¤ì¤Ï¥À¥á¤À¤Ã¤¿
w,h = fig.canvas.get_width_height()
buf = np.frombuffer( fig.canvas.tostring_argb(), dtype=np.uint8 )
buf.shape = ( w, h,4 )
# canvas.tostring_argb give pixmap in ARGB mode. Roll the ALPHA channel to have 
it in RGBA mode
buf = np.roll ( buf, 3, axis = 2 )

w, h, d = buf.shape
im = Image.frombytes( "RGBA", ( w ,h ), buf.tostring( ) )
im.show()

# ¤³¤ì¤Ïjupyter notebook¾å¤Çư¤¤¤¿¡¡PIL¤Îim.show()¤Î½ÐÎϤÏXwin¤Ø¹Ô¤¯

Ê£¿ô¤Î¿Þ¡Êsubplot¡Ë¤ò£±Ëç¤Î¿Þ¤Ë¤¹¤ë»þ

subplots¤ò»È¤Ã¤Æ»ØÄê¤Ç¤­¤ë¡£

fig, ax = plt.subplots( ncols=len(xdata), nrows=len(ydata), figsize=(6*len(xdata), 6*len(ydata))
...
for i, x in xdata:
  for j, y in ydata:
    sns.kdeplot(data[i], data[j], ax=ax[j,i], shade=True)
plt.show()

subplot¤Ç¿Þ¤Î´Ö³Ö¤¬¶¹¤¤¤È¤­¤Ï

subplots_adjust¤ò»È¤¦

plt.subplots_adjust(wspace=0.4, hspace=0.6)

¥Ç¥Õ¥©¥ë¥È¤Ï0.2, 0.2¤é¤·¤¤¡£Â礭¤¯¤¹¤ë¤È¹­¤¬¤ë¡£Ëç¿ô¤Ë¤è¤ë¡£

¥Ò¥¹¥È¥°¥é¥à

»¶ÉÛ¿Þ

»¶ÉۿޤγÆÅÀ¤ËÃͤò½ñ¤¯

matplotlib¤Îannotate¤ò»È¤¦

<Python, matplotlib> »¶ÉۿޤγÆÍ×ÁǤËʸ»ú¤òÉÕ¤±¤ë¡£ - ¤Í¤³¤æ¤­¤Î¥á¥â

Pandas¤Ç»¶ÉÛ¿Þ¤ò½ñ¤¯¤È¤­³ÆÍ×ÁǤΥé¥Ù¥ë¤òɽ¼¨ - Qiita

annotate¤ò»È¤¦¤Î¤Ëax¤¬É¬ÍפʤΤǡ¢¤¿¤È¤¨¤Ð¡¢

%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
df = pd.DataFrame([[1,3,5], [2,4,6]], index=['½õ¶µ', '¶µ¼ø'], columns=['¾¯¤Ê¤¤', '¤Þ¤¢¤Þ¤¢', '¿¤¤'])
dfx = pd.DataFrame(columns=['¿¦³¬', 'ɾ²Á', 'ȯÀ¸¿ô'])
for ix in df.index:
    for col in df.columns:
        line = [ix, col, df.loc[ix][col]]
        s = pd.Series(line, index=['¿¦³¬', 'ɾ²Á', 'ȯÀ¸¿ô'])
        dfx = dfx.append(s, ignore_index=True)
print(df)
print('dfx\n', dfx)
ax = plt.subplot(1,1,1)  # <-- ¤³¤Î¤è¤¦¤Ë¤·¤Æax¤òºî¤ë
plt.scatter(dfx['¿¦³¬'].to_list(), dfx['ɾ²Á'].to_list(), s=[u*20 for u in 
dfx['ȯÀ¸¿ô'].to_list()])

for k, v in dfx.iterrows():
    print(v)
    ax.annotate(v[2],xy=(v[0],v[1]),size=14)
plt.show()

Pandas¤Ç»¶ÉÛ¿Þ¤ò½ñ¤¯¤È¤­³ÆÍ×ÁǤΥé¥Ù¥ë¤òɽ¼¨ - Qiita¤Ç¤Ïax¤òºî¤ëÂå¤ê¤Ë¡¢

import pandas as pd

# DataFrame¤Ë¥Ç¡¼¥¿¤ò¥»¥Ã¥È
dd = pd.DataFrame([
        [10,50,'hoge'],
        [50,30,'fpp'],
        [20,30,'baa']
    ], columns=['ax','ay','label'])

# »¶ÉÛ¿Þ¤òÉÁ²è
a = dd.plot.scatter(x='ax',y='ay')
# ³ÆÍ×ÁǤ˥é¥Ù¥ë¤òɽ¼¨
for k, v in dd.iterrows():
    a.annotate(v[2], xy=(v[0],v[1]), size=15)

¤Î¤è¤¦¤Ë¡¢a = df.plot.scatter() ¤È¤·¤Æ¤ª¤¤¤Æ¡¢¤³¤Îa¤ËÂФ·¤Æ a.annotate()¡£

pandas¤Îplot¤Çx¼´¥é¥Ù¥ë¤Îʸ»ú¤Î¸þ¤­¤ò²óž¤µ¤»¤ë

Íç¤Îmatplotlib¤Ç¤Ï

plt.xticks(rotation=90)

¤Ê¤É¤È¤¹¤ë¡£90¤Ï±¦90ÅÙ²óž¡Ê½Ä½ñ¤­¡¢¤³¤ì¤¬¥Ç¥Õ¥©¥ë¥È¡Ë¡£²£½ñ¤­¤Ë¤¹¤ë¤Ë¤Ïrotation=0¡£

pandas¤Îplot¤Î¾ì¹ç¤Ï¡¢¾åµ­¤Îxticks¤ò²Ã¤¨¤Æ¤â¤è¤¤¤¬¡¢plot¤Î°ú¿ô¤Ë»ØÄꤹ¤ë¤³¤È¤â¤Ç¤­¤ë¡£¤¿¤È¤¨¤Ð

df.plot(kind='bar', rot=0)
df.plot.bar(rot=0)

¤¤¤º¤ì¤â¡¢³ÑÅ٤λØÄê¤Ïxticks¤ÈƱÍͤǡ¢0¤¬²£½ñ¤­¤Ë¤Ê¤ë¤¬¡¢¥Ç¥Õ¥©¥ë¥È¤Ï90¤Î½Ä½ñ¤­¡£

matplotlib¤ÇËÞÎãlegend¤Î°ÌÃÖ¤òÄ´À°¤¹¤ë

plt.legend(bbox_to_anchor=(0.005, 0.995), loc='upper left', borderaxespad=0)

¤ä

plt.legend(bbox_to_anchor=(1.005, 0.995), loc='upper left', borderaxespad=0)

ËÞÎã¤Î¥Æ¥­¥¹¥È¤òÊѹ¹¤¹¤ë

python - Modify the legend of pandas bar plot - Stack Overflow

£±Ëܤ´¤È¤Ëplt¤ò¸Æ¤ó¤Ç¤¤¤ë¤È¤­¡Êpandas¤Ç¤Ï¤Ê¤¯¤ÆÀ¸¤Îmatplotlib¤Î»þ¡Ë

plt.plot.bar(... label='¼«Ê¬¤Î¥Æ¥­¥¹¥È', ...)

¤Ç»ØÄê¤Ç¤­¤ë¡£

pandas¤ÇDataFrame¤ò°ìµ¤¤ËÉÁ¤¤¤Æ¤·¤Þ¤¦¡ÊÎ󤴤ȤËÉÁ²è¤Ê¤É¡Ë¤È¤­¤Ï¡¢£±¤ÄÌÜ¤Ï plt.legend()¤Ç¡¢¥ê¥¹¥È¤ò»ØÄê¤Ç¤­¤ë¡£

df = pd.DataFrame({'A':26, 'B':20}, index=['N'])
ax = df.plot(kind='bar')
ax.legend(["AAA", "BBB"]);

£±¤Ä¤ÎfigureÆâ¤ËÊ£¿ô¤Îax¤¬Â¸ºß¤·¤Æ¤â¤¤¤¤¡£

fig, ax = plt.subplots(nrows=1, ncols=len(years), figsize=(8*len(years), 6))
for i, year in enumerate(years):
   dfhx.plot.line(ax=ax[i])
   ax[i].legend([¿ÞiÍѤΡ¢Àþ¤Î¿ô¤ËÂбþ¤·¤¿¥ê¥¹¥È])

ËÞÎã¤Îɽ¼¨½ç½ø¤òȿž¤¹¤ë

Legend guide — Matplotlib 1.3.1 documentation

ax = subplot(1,1,1)
p1, = ax.plot([1,2,3], label="line 1")
p2, = ax.plot([3,2,1], label="line 2")
p3, = ax.plot([2,3,1], label="line 3")

handles, labels = ax.get_legend_handles_labels()

# reverse the order
ax.legend(handles[::-1], labels[::-1])

¼«Ê¬¤Î¥×¥í¥°¥é¥à¤Ç¤Ï¡¢

# df2¤ò½àÈ÷¤·¤Æ¤ª¤¯
a = df2.plot.bar(stacked=True)
plt.title(lvl1[0]+'/'+lvl2)
handles, labels = a.get_legend_handles_labels()
plt.legend(handles[::-1], labels[::-1], bbox_to_anchor=(1.005, 0.995), loc='upper left', borderaxespad=0)
pdf.savefig(bbox_inches='tight')
plt.show()

»¶ÉۿޤǾ嵭annotation¤ò²Ã¤¨¤¿¤È¤­¤Ëʸ»ú¤¬½Å¤Ê¤ë¤Î¤òËɤ°

¤¤¤ë¤«¤Î¥Ü¥Ã¥¯¥¹: Matplotlib¤Ç¥Æ¥­¥¹¥È¥é¥Ù¥ë¤ò½Å¤Ê¤é¤Ê¤¤¤è¤¦¤Ëɽ¼¨¤¹¤ë

»¶ÉÛ¿Þ¤ÇÅÀ¤Î¥é¥Ù¥ë¤òÆþ¤ì¤¿¤È¤­¤Ë¡¢¿ô¤¬Â¿¤¤¤È¡¢½Å¤Ê¤ê¹ç¤Ã¤ÆÆÉ¤á¤Ê¤¯¤Ê¤ë¤³¤È¤¬¤¢¤ë¡£¤½¤ì¤òÈò¤±¤ë¤Ë¤Ï¡¢adjust_text¥é¥¤¥Ö¥é¥ê¤¬»È¤¨¤ë¡£¡Êadjust¤Ètext¤Î´Ö¤Ë²¼Àþ¤¬¤¢¤Ã¤¿¤ê¤Ê¤«¤Ã¤¿¤ê¤Ê¤Î¤ÇÍ×Ãí°Õ¡Ë

pip install adjusttext

¤Ç¥¤¥ó¥¹¥È¡¼¥ë¤·¤¿¸å¡¢

from adjustText import adjust_text
...
texts = []
a = dfout.plot.scatter(x='½ÅÍ×ÅÙ', y='Ëþ­ÅÙ')
for k, v in dfout.iterrows():
    u = a.annotate(v[2], xy=(v[0],v[1]), size=10)
    texts.append(u)
plt.title('...')
adjust_text(texts)
plt.savefig(...)
plt.show()

¤³¤ì¤Ç¡¢¤«¤Ê¤ê¤Î¾ì¹ç¼«Æ°Åª¤Ë°ÌÃÖ¤ò·è¤á¤Æ¤¯¤ì¤ë¡£¡Ê⤷ÃÙ¤¯¤Ê¤ë¤é¤·¤¤¡Ë

Ê£¿ô¤Î¥×¥í¥Ã¥È¤¬¤¢¤ë¾ì¹ç¡¢ax¤ò»ØÄꤷ¤Æadjust_text¤ò¤¹¤ëɬÍפ¬¤¢¤ë¡£»ØÄꤷ¤Ê¤¤¤È°ÌÃ֤⤺¤ì¤ë¡£

With multiple subplots, run adjust_text for one subplot at a time

fig, axes = plt.subplots(1, 2, figsize=(8, 3))
for k, ax in enumerate(axes):
   ...
   ax.plot(x, y, 'bo')
   ...
   texts = []
   for i in range(len(x)):
       t = ax.text(x[i], y[i], 'Text%s' %i, ha='center', va='center')
       texts.append(t)
   adjust_text(texts, ax=ax)

matplotlib¤Ç¼´¤Î¥á¥â¥ê¤Îʸ»ú¤òÊѹ¹¤¹¤ë¤Ë¤Ï

matplotlib - ÌÜÀ¹¡¢ÌÜÀ¹¤Î¥é¥Ù¥ë¡¢¥°¥ê¥Ã¥É¤ÎÀßÄêÊýË¡ - Pynote

ax.set_xticklabels(¥ê¥¹¥È)¡¢ax.set_yticklabels(¥ê¥¹¥È)¤ÇÀßÄꤹ¤ì¤Ð¤è¤¤¤é¤·¤¤¡£ ⤷¥ê¥¹¥È¤Î¸Ä¿ô¤Ï¸µ¤Î¸Ä¿ô¡Ê¤Ä¤Þ¤êxticks¤Ç¤Î¸Ä¿ô¡Ë¤Ë¹ç¤ï¤»¤ëɬÍפ¬¤¢¤ê¤½¤¦¡£ ÎãÂê¤Ç¤Ï¡¢

# x ¼´ (major) ¤ÎÌÜÀ¹¤ê¤òÀßÄꤹ¤ë¡£
ax.set_xticks(np.linspace(0, np.pi * 4, 5))
# x ¼´ (major) ¤ÎÌÜÀ¹¤ê¤Î¥é¥Ù¥ë¤òÀßÄꤹ¤ë¡£
ax.set_xticklabels(["0", "$1\pi$", "$2\pi$", "$3\pi$", "$4\pi$"])

# y ¼´ (major) ¤ÎÌÜÀ¹¤ê¤òÀßÄꤹ¤ë¡£
ax.set_yticks(np.linspace(-1, 1, 3))
# y ¼´ (major) ¤ÎÌÜÀ¹¤ê¤Î¥é¥Ù¥ë¤òÀßÄꤹ¤ë¡£
ax.set_yticklabels(["A", "B", "C"])

¤Î¤è¤¦¤Ë¤·¤Æ¤¤¤ë¡£¼«Ê¬¤ÎÎã¤Ç¤Ï¡¢

a = dfhxzout.plot.line(ax=ax[i], marker='x', xticks=range(len(DHankelist[col])), \
      title=year+'ǯÆþ³Ø Æþ³Ø»þ '+col, rot=90)
a.set_xticklabels([u[3:8] for u in DHankelist[col]])

¥Ò¡¼¥È¥Þ¥Ã¥×

³¬ÁØÅª¥¯¥é¥¹¥¿¥ê¥ó¥°

import numpy as np
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import pdist
import matplotlib.pyplot as plt
X = np.array([[1,2], [2,1], [3,4], [4,3]])
Z = linkage(X, 'single')  # wardË¡¤ò»È¤¦¤Ê¤é¤Ð 'single' ¤ÎÂå¤ï¤ê¤Ë 'ward' ¤ò»ØÄꤹ¤ë
dendrogram(
    Z,
    leaf_font_size=8.,  # font size for the x axis labels
)
plt.show()
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import linkage, dendrogram
df_count_tpm = pd.read_csv("count_tpm.tsv", sep="\t", index_col=0)
tpm_t = df_count_tpm.T
print(df_count_tpm.head())
from scipy.spatial.distance import pdist
linkage_result = linkage(tpm_t, method='average', metric='correlation')
plt.figure(num=None, figsize=(16, 9), dpi=200, facecolor='w', edgecolor='k')
dendrogram(linkage_result, labels=df_count_tpm.columns)
plt.show()

¥¯¥é¥¹¥¿¡¼¼«ÂΤò¼è½Ð¤·¤¿¤¤»þ¤Ï fcluster ¤¬»È¤¨¤ë¡£

import numpy as np
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
from scipy.spatial.distance import pdist
import matplotlib.pyplot as plt
X = np.array([[1,2], [2,1], [3,4], [4,3], [1,3], [3,1]])
Z = linkage(X, 'single')  # wardË¡¤ò»È¤¦¤Ê¤é¤Ð 'single' ¤ÎÂå¤ï¤ê¤Ë 'ward' ¤ò»ØÄꤹ¤ë
plt.figure(figsize=(20,20), dpi=200, facecolor='w', edgecolor='k')
dendrogram(
    Z,
    leaf_font_size=8.,  # font size for the x axis labels
    labels = ['A', 'B', 'C', 'D', 'E', 'F']
)
plt.savefig('dendrogram.pdf')
plt.show()

NUM_CLUSTERS = 5
nodelabels = ['A', 'B', 'C', 'D', 'E', 'F']
for num in range(5, NUM_CLUSTERS+1):
    labels = fcluster(Z, t=num, criterion='maxclust')
    #fcluster¤Ï¡¢ÆþÎϤ¬¤É¤Î¥¯¥é¥¹¥¿¤Ë°¤¹¤ë¤«¡Ê¥¯¥é¥¹¥¿ÈÖ¹æ labels¡Ë¤òÊÖ¤¹
    print(num, labels)
    # ¥¯¥é¥¹¥¿¤´¤È¤Ë¡¢¤½¤ì¤Ë°¤¹¤ëÆþÎϤò¥ê¥¹¥È¤È¤·¤ÆÉ½¼¨
    for cl_id in range(1, num+1):
        l = [nodelabels[n] for n in range(0,len(labels)) if labels[n]==cl_id]
        print(' ', cl_id, l)

½ÐÎϤÏtest_dendrogram.png

fcluster¤Î½ÐÎÏ¡§

5 [1 2 3 4 1 2]  # ¥ê¥¹¥È¤Ë³ÆÍ×ÁǤΥ¯¥é¥¹¥¿id¡Ê1¡Á4¡Ë¡£Âè1Í×ÁǤÈÂè5Í×ÁǤϥ¯¥é¥¹¥¿1¤Ë¡¢Âè2¤ÈÂè6¤Ï¥¯¥é¥¹¥¿2¤Ë°¤¹¤ë

¤½¤ì¤ò½ñ¤­Ä¾¤·¤¿·ë²Ì¡§

  1 ['A', 'E']   # ¾åµ­¤ò¡¢¥¯¥é¥¹¥¿1¤Ë¤ÏÍ×ÁÇ1¤È5¤¬Â°¤¹¤ë¤ÈÊÑ·Á¤·¡¢¹¹¤ËÍ×ÁÇid¡Ê1,5¡Ë¤ò¥é¥Ù¥ë¡ÊA,E¡Ë¤ËÃÖ´¹¤¨¤¿
  2 ['B', 'F']
  3 ['C']
  4 ['D']
  5 []

k-meansË¡¤Ë¤è¤ë¥¯¥é¥¹¥¿¥ê¥ó¥°

mySQL¥¢¥¯¥»¥¹

def read_db(query):
    engine = create_engine('mysql+mysqldb://userid:password@localhost/dbname?charset=utf8',\
                           echo=False)
    dfr = pd.io.sql.read_sql(query, engine)
    return(dfr)

query = 'select * from tablename'
df = read_db(query)

¥Õ¥¡¥¤¥ë¤Î¸ºß

import os
path = "./sample"
os.path.exists(path)

pickle

¥×¥ì¡¼¥ó¤ÊPython¤Î¾ì¹ç

with open(picklefname, 'rb') as pf:
    df = pickle.load(pf)
with open(picklefname, 'wb') as pwf:
    pickle.dump(df, pwf)

Pandas¤ÎDataFrame¤Î¾ì¹ç¡¢¡Ê¥á¥½¥Ã¥É¤¬¤¢¤ë¡Ë

df.to_pickle(picklefname)
df = pd.read_pickle(picklefname)

¾ò·ï¼°¡Ê3¹à±é»»»Ò¡Ë

x = "OK" if n == 10 else "NG"

ʸ»úÎó¤Î¥ê¥¹¥È¤ò·Ò¤¤¤Ç£±¤Ä¤Îʸ»úÎó¤Ë¤¹¤ë

s = ''
for u in list:
   s += u

'´Ö¤ËÁÞÆþ¤¹¤ëʸ»úÎó'.join([Ï¢·ë¤·¤¿¤¤Ê¸»úÎó¤Î¥ê¥¹¥È])

s = ''.join(list)

ʸ»úÎó¤Î¸¡º÷

>>> "spam".find("pa")
1
>>> "spam".find("x")
-1

index¤Ï¸«¤Ä¤«¤é¤Ê¤¤¤È¤­¤Ë¥¨¥é¡¼¤òÊÖ¤¹

Series¤Ë2°ú¿ô¤Î´Ø¿ô¤òapply¤¹¤ë¾ì¹ç

df.x.apply(f1, args=(2,))
# ¤Þ¤¿¤Ï¡¢ apply ¤Ë¥­¡¼¥ï¡¼¥É°ú¿ô¤ÇÍ¿¤¨¤ë
df.x.apply(f1, b=2)

DFÆâ¤Î¸¡º÷

df[X].isin([list])  list¤Ç¤Ê¤±¤ì¤Ð¤Ê¤é¤Ê¤¤¡£

pandas¤ÇǤ°Õ¤Î°ÌÃÖ¤ÎÃͤò¼èÆÀ¡¦Êѹ¹¤¹¤ëat, iat, loc, iloc

https://note.nkmk.me/python-pandas-at-iat-loc-iloc/

argv, argc

import sys
argvs = sys.argv;
argc = len(argvs)
if (argc != 2):
    print('Usage: python %s filename' % argvs[0])
    quit()

¥ê¥¹¥È¤Î¥½¡¼¥È

newls = sorted(list)

¥á¥½¥Ã¥Ésort¤Ïin-place¤Ç¥½¡¼¥È¤¹¤ë¤Î¤ÇÃí°Õ¡£

list.sort()  # list¤½¤Î¤â¤Î¤¬ÃÖ¤­´¹¤ï¤ë

¥ê¥¹¥È¤Îflatten

# Python 3 ¤Ç flatten ¤¹¤ëÊýË¡¤¤¤í¤¤¤í

# https://qiita.com/hoto17296/items/e1f80fef8536a0e5e7db

sum([[1,2,3],[4,5,6],[7,8,9]], [])  #=> [1, 2, 3, 4, 5, 6, 7, 8, 9]

Â裲°ú¿ô¤Î[]¤¬É¬Íס£¤³¤ì¤Ï¡¢Â裲°ú¿ô¤Î¥Ç¥Õ¥©¥ë¥ÈÃͤ¬0¤Ê¤Î¤Ç¿ôÃͤΡܤÀ¤È»×¤¤¹þ¤à¤«¤é¡£

l = [['A', 'B', 'C'], ['D', 'E'], ['F']]
print(sum(l, []))

¥°¥í¡¼¥Ð¥ëÊÑ¿ô

https://uxmilk.jp/12505

var = 1
def add_ten(x):
    global var
    var = 10
    print var + x
 
add_ten(2)    # 12
print var     # 10

¥ê¥¹¥È¤Îº¹Ê¬

https://python.ms/sub/optimization/if-vs-try/list-deletion/#_6-1-1-%E6%AF%94%E8%BC%83%E5%AF%BE%E8%B1%A1

def subtract_list(lst1, lst2):
    lst = lst1.copy()
    for e2 in lst2:
        try:
            lst.remove(e2)
        except ValueError:
            continue
    return lst

if¤Ç¥Á¥§¥Ã¥¯¤¹¤ë¤è¤êÁᤤ

·¿¤Î¥Á¥§¥Ã¥¯

ÆÃ¤ËNaN¤¬¤¢¤ë¤È¡¢NaN¤Ïfloat·¿¤À¤È»×¤¤¹þ¤à¤Î¤Ç¡¢¸å¤Î½èÍý¤Ç¤Ö¤Ä¤«¤ë¤³¤È¤¬¤¢¤ë¡£

¤½¤Î»þ¤Ë¡¢·¿¥Á¥§¥Ã¥¯¤Çƨ¤²¤ë¤È¤¹¤ë¤È¡¢¡ÊpandasŪ¤Ë¤Ï¸ÄÊ̤η¿¥Á¥§¥Ã¥¯¤è¤ê¤Ïdropna¤Ê¤É¤ÎÊý¤¬¤¤¤¤¤È»×¤¦¤¬¡ËɬÍפˤʤ롣

Python¤Ç·¿¤ò¼èÆÀ¡¦È½Äꤹ¤ëtype´Ø¿ô, isinstance´Ø¿ô | note.nkmk.me

¤Ê¤Î¤À¤¬¡¢isinstance()¤ÎÊý¤¬¤è¤µ¤½¤¦¤À¡£type¤Ç°ú¤Ã¤«¤«¤Ê¤¤¾ì¹ç¤¬¤¢¤ë¡£

Îó¤Î·¿ÊÑ´¹

df['innings'].astype(np.int64)

¥ê¥¹¥È¤ÎǤ°Õ¤Î°ÌÃ֤ؤιàÌÜÁÞÆþ

list.insert(°ÌÃÖ, ÃÍ)

¥ê¥¹¥È¤òÃÖ¤­´¹¤¨¤ë¤Î¤ÇÃí°Õ

ÀäÂÐÃÍ python¤Îabs¤Èmath¤Îfabs¡¢numpy¤Îabs(absolute)¡¢fabs

python¤Îabs¤Ï¡¢À°¿ô¤ÎÀäÂÐÃͤÏÀ°¿ô¡¢¾®¿ô¤ÎÀäÂÐÃͤϾ®¿ô¡¢Ê£ÁÇ¿ô¤ÎÀäÂÐÃͤâ²Ä¡£

math¤Îfabs¤Ï¡¢À°¿ô¤ËÂФ·¤Æ¤â¾®¿ô¤ËÂФ·¤Æ¤â¡¢ÀäÂÐÃͤϾ®¿ô¤òÊÖ¤¹¡£Ê£ÁÇ¿ô¤ÏÉԲġ£

numpy¤Îabs(=absolute¤È½ñ¤¤¤Æ¤â¤¤¤¤¤é¤·¤¤¡Ë¤ÏÇÛÎóndarray¤ËÂФ·¤Æ¤âŬÍѤǤ­¤ë¡£ ¤â¤·À°¿ô¤È¾®¿ô¤¬º®¤¶¤Ã¤Æ¤¤¤ë¤È¡¢¾®¿ô¤Ë¤¹¤ë¡ÊºÇÂçÀºÅÙ¡Ë¡£

numpy¤Îfabs¤Ïmath¤Îfabs¤ÈƱÍͤˡ¢É¬¤º¾®¿ô¤òÊÖ¤¹¡£Ê£ÁÇ¿ô¤ÏÉԲġ£

pie chart¤ÇÀ¸¤Î¥Ç¡¼¥¿¤òÆþ¤ì¤ë

python - Pandas pie plot actual values for multiple graphs - Stack Overflow

¥Ò¥¹¥È¥°¥é¥à¡¦¥«¥¦¥ó¥È

df.hist()
(df['column']).hist()
(df['column']).value_counts()
(df['column']//10*10).value_counts()

Pandas¤Ç¥Ç¡¼¥¿¤ÎÃͤÎÉÑÅÙ¤ò·×»»¤¹¤ëvalue_counts´Ø¿ô¤Î»È¤¤Êý - DeepAge

¥«¥¦¥ó¥È·ë²Ì¤ÏSeries¤Ê¤Î¤Ç¡¢

s = pd.Series([3, 2, 7, 2, 3, 4])
u = s.value_counts(sort=False)
print(u)
# 2    2
# 3    2
# 4    1
# 7    1

print(type(u))
<class 'pandas.core.series.Series'>

print(u.index)
Int64Index([2, 3, 4, 7], dtype='int64')

# ¥«¥¦¥ó¥È·ë²Ìu¤òindex¤Ç¥½¡¼¥È¤¹¤ë¤È¡ÊÌۤäƤ¤¤ë¤ÈÃͤι߽ç¤Ç¥½¡¼¥È¡Ë
print(u.sort_index(ascending=False))
# 7    1
# 4    1
# 3    2
# 2    2

²ÊÌÜÊ̤ÎÀ®Àӥǡ¼¥¿¤òGroupBy¤Ç½¸·×À°Íý¤·¡¢ÅÀ¿ô¥Ò¥¹¥È¥°¥é¥à¤òÉÁ¤¯Îã

# df ¤¬ÆþÎϤβÊÌÜÊÌÀ®Àӥǡ¼¥¿ 
df = df[['³ØÀÒÈÖ¹æ', '³Ø²Ê', 'Æþ³ØÇ¯ÅÙ', '¼èÆÀÁÇÅÀ']]
dfmean = df.groupby('³ØÀÒÈÖ¹æ').mean()  # 1¿Í1¿Í¤Îºß³ØÃæ¤ÎɾÅÀÊ¿¶Ñ
dfgakka = dfmean.groupby(['³Ø²Ê', 'Æþ³ØÇ¯ÅÙ'])['¼èÆÀÁÇÅÀ'].apply(lambda d: (d//5*5).value_counts(bins=list(range(0,101,5))).sort_index()) 
# dfgakka¤Ï¡¢Series¤Ç¡¢index¤¬ ('³Ø²Ê', 'Æþ³ØÇ¯ÅÙ', 'ɾÅÀ¤Î5ÅÀ¹ï¤ß¤Î¥¯¥é¥¹')¤Î£³¥ì¥Ù¥ë
   
for year in [2010, 2011, 2012, 2013, 2014, 2015]:
    # DataFrame¤Ëľ¤¹
    for gakka in [51, 52, 53, 54, 55, 56]:    # ³Ø²Ê¤´¤È¤ËÎó¤Ë¤¹¤ë
        ser = dfgakka.xs(year, level='Æþ³ØÇ¯ÅÙ').xs(gakka, level='³Ø²Ê')
        if gakka==51:
            dfout = pd.DataFrame(ser)
            dfout.columns = [gakka]
        else:
            dfout[gakka] = ser
    dfout = dfout.rename(columns=gakkaname)
    dfout.index = [u.left for u in dfout.index.to_list()]
    #dfoutx.plot.bar(figsize=(8,6), rot=0)
    dfout.plot.line(figsize=(8,6), marker='x', rot=0, xticks=[u for u in dfout.index.to_list()])
    plt.title('ÄÌ»»É¾ÅÀÉÑÅÙʬÉÛ '+str(year)+' ǯÆþ³Ø')
    plt.xlabel('Ê¿¶ÑÄÌ»»É¾ÅÀ')
    plt.ylabel('¿Í¿ô')
    plt.legend(bbox_to_anchor=(1.01, 0.995), loc='upper left', borderaxespad=0)
    pdf.savefig(bbox_inches='tight')
    plt.show()

¤Þ¤¿¥Ò¥¹¥È¥°¥é¥à¤¬¿Í¿ô¤ÎÂå¤ï¤ê¤Ë%¤Ë¤¹¤ë¤Ë¤Ï

    dfm = dfout.sum()
    dfm = dfout/dfm*100
    dfm = dfm.rename(columns=gakkaname)
    #dfm.plot.bar(figsize=(8,6), rot=0)
    dfm.plot.line(figsize=(8,6), marker='x', rot=0, xticks=[u for u in dfm.index.to_list()])

ÆÃÄê¾ò·ï¤òËþ¤¿¤¹Í×ÁÇ¿ô¤ò¥«¥¦¥ó¥È

pandas¤ÇÆÃÄê¤Î¾ò·ï¤òËþ¤¿¤¹Í×ÁÇ¿ô¤ò¥«¥¦¥ó¥È¡ÊÁ´ÂΡ¢¹Ô¡¦Îó¤´¤È¡Ë | note.nkmk.me

¤¿¤È¤¨¤Ð

df['age']<25

¤ÏdfÃæ¤Î25̤Ëþ¤Î¥Ç¡¼¥¿¤ËÂФ·¤ÆTrue¤òÆþ¤ì¤¿Series¤òÊÖ¤¹¤«¤é

(df['age']<25).sum()

¤Ë¤è¤Ã¤Æ¸Ä¿ô¤ò¿ô¤¨¤ë¤³¤È¤¬¤Ç¤­¤ë¡£¡ÊTrue¤Ï1¡¢False¤Ï0¡Ë

ñ¤Ë¥Ç¡¼¥¿¤Î¸Ä¿ô¡ÊNaN¤Ç¤Ê¤¤¥Ç¡¼¥¿¤Î¸Ä¿ô¡Ë¤ò¿ô¤¨¤ë¤À¤±¤Ê¤é¡¢

df.count()   # Î󤴤ȡʽġˤ˥«¥¦¥ó¥È
df.counts(axis=1)   # ¹Ô¤´¤È¡Ê²£¡Ë¤Ë¥«¥¦¥ó¥È

¤¬»È¤¨¤ë¡£

matplotlib¤Çsavefig¤·¤¿¤È¤­¤Ë²¼¤¬ÀÚ¤ì¤ë¾ì¹ç

Matplotlib (with seaborn) ¤Ç½ÐÎϤ¹¤ëPDF¤Î²¼¤Î¤Û¤¦¤¬ÀÚ¤ì¤Æ¤·¤Þ¤¦¤È¤­¤Ï bbox_inches='tight' - ScalaÆüµ­

bbox_inches = "tight"¤È¤«¤½¤ì·Ï¤Î¤ä¤Ä - virsalus¤ÎÆüµ­

plt.savefig('sample.pdf', bbox_inches='tight')

¤È¤¹¤ë¤«

plt.tight_layout()

¤È¤¹¤ë¤«¤ÇÂбþ¡£

µ÷Î¥¹ÔÎódmat¤¬¤¢¤ë¤È¤­¡¢

ndmat =squareform(dmat)
lk = linkage(ndmat,method='average')
plt.figure(num=None, figsize=(22, 12), dpi=200, facecolor='w', edgecolor='k')
dendrogram(lk, labels=dmat.index, leaf_rotation=90)
plt.tight_layout()
# plt.savefig('corr_coeff_dendrogram.png', bbox_inches="tight")
plt.savefig('corr_coeff_dendrogram.png')

XML to Dict

Python: xmltodict ¤ò»È¤Ã¤Æ XML ¤ò¼­½ñ¤ØÊÑ´¹

biopython, SNP in feature

BioPython Tutorial

4.3.2.4 Location testing

You can use the Python keyword in with a SeqFeature or location object to see if the base/residue for a parent coordinate is within the feature/location or not.

For example, suppose you have a SNP of interest and you want to know which features this SNP is within, and lets suppose this SNP is at index 4350 (Python counting!). Here is a simple brute force solution where we just check all the features one by one in a loop:

>>> from Bio import SeqIO
>>> my_snp = 4350
>>> record = SeqIO.read("NC_005816.gb", "genbank")
>>> for feature in record.features:
...     if my_snp in feature:
...         print("%s %s" % (feature.type, feature.qualifiers.get("db_xref")))
...
source ['taxon:229193']
gene ['GeneID:2767712']
CDS ['GI:45478716', 'GeneID:2767712']

Note that gene and CDS features from GenBank or EMBL files defined with joins are the union of the exons – they do not cover any introns.

Matplotlib¤Ç¡¢Ê£¿ô¤Îfigure¤ò£±¤Ä¤ÎPDF¥Õ¥¡¥¤¥ë¤ËÊݸ¤¹¤ëÊýË¡

matplotlib¤ÇÊ£¿ô¤Îfigure¤ò°ì¤Ä¤Îpdf¤ËÊݸ¤¹¤ë - Qiita

import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages  # <-- ÄɲÃ

pdf = PdfPages('¥Õ¥¡¥¤¥ë̾.pdf')   # <-- Äɲá¡¡Êpdf¤ò¥ª¡¼¥×¥ó¡Ë

for ... :   # Ê£¿ôËçºî¤ë
    plt.scatter(....)¡¡   # plot¤·¤Æ¿Þ¤òºî¤ë
    pdf.savefig()   #  <-- savefig¤Î¹ÔÀè¤Ïpdf¤Ë¤Ê¤ë
    plt.show()    # <-- ²èÌÌɽ¼¨

pdf.close()   # <-- ºÇ¸å¤Ë˺¤ì¤Ê¤¤¤è¤¦¤Ë 

pandas plot.bar¤ÇX¼´ÌÜÀ¹¤Îʸ»ú¤¬½Ä¤ò¸þ¤«¤Ê¤¤·ï

DataFrame df ¤ò df.plot.bar() ¤Ç¥°¥é¥Õɽ¼¨¤¹¤ë¤È¤­¡¢X¼´ÌÜÀ¹¤Îʸ»ú¤¬ ²£¸þ¤­¤Ë¤Ê¤Ã¤Æ¤·¤Þ¤¦¡£

¤¤¤í¤¤¤í¤ä¤Ã¤Æ¤ï¤«¤Ã¤¿¤Î¤Ï¡¢

df.plot.bar(y='¥«¥é¥à̾')

¤È¤¹¤ë¤ÈÀµ¤·¤¯É½¼¨¤µ¤ì¤ë¤¬¡¢

df.plot.bar()

¤À¤È¥À¥á¡£y=¤ò½ñ¤«¤Ê¤¤¥¹¥¿¥¤¥ë¤Ï¡¢¤¹¤Ù¤Æ¤Î¥«¥é¥à¤¬É½¼¨¤µ¤ì¤ë¤Î¤Ç¶ñ¹ç¤¬¤¤¤¤¡£ df¤Ë£±¤Ä¤·¤«¥«¥é¥à¤¬¤Ê¤±¤ì¤Ð¤½¤ì¤ÇºÑ¤à¤·¡¢Ê£¿ô¤¢¤ì¤Ð¤½¤ì¤é¤¬¤ß¤Êɽ¼¨¤µ¤ì¤ë¡£ ¥é¥Ù¥ë¤Î¤³¤È¤µ¤¨µ¤¤Ë¤·¤Ê¤±¤ì¤Ð¤³¤ì¤Ç¤â½½Ê¬¤À¤í¤¦¡Ë

¤Þ¤¿¡¢xticks¤ò»È¤Ã¤Æ²óž¤µ¤»¤ë¥ï¥¶¤ÏƯ¤«¤Ê¤«¤Ã¤¿¡£¤¿¤È¤¨¤Ð

plt.xticks(rotation=90)

¤È¤«¤Ï¸ú¤«¤Ê¤«¤Ã¤¿¡£

Seaborn heatmap¤Ç¾å²¼Ã±¤¬ÀÚ¤ì¤ëÌäÂê

seaborn¤Îheatmap¤Çy¼´¤¬Â­¤ê¤Ê¤¯¤Æ¡¢annot¤¬³èÍѤǤ­¤Ê¤¤ - Qiita

¥Ð¡¼¥¸¥ç¥ó¥¢¥Ã¥×¤Ç²ò·è¡£¸Å¤¤¥Ð¡¼¥¸¥ç¥ó¤Î»þ¤Ïƨ¤²¤ëÊýË¡¤¬¤¢¤ë¡£ ax.set_ylim(len(flights), 0)

Seaborn heatmap¤Þ¤ï¤ê

¤È¤ê¤¢¤¨¤º¥Þ¥Ë¥å¥¢¥ë

seaborn.heatmap — seaborn 0.9.0 documentation

annotation

fmt¤Î»ØÄê¡§¡¡fmt='.2f'¤È¤«fmt='d'¤È¤«

¥«¥é¡¼¥Ñ¥ì¥Ã¥È

Seaborn¤Î¥«¥é¡¼¥Ñ¥ì¥Ã¥È¤ÎÁª¤ÓÊý - Qiita

Seaborn¤ÇÁê´Ø¤ò¥Ò¡¼¥È¥Þ¥Ã¥×¤Ë¤¹¤ë¡Ê¹Ô¡¦Îó¤òʤÓÂØ¤¨¤Ê¤¬¤é¡Ë / Heatmap using Seaborn (order rows and columns as you like) - Qiita

seaborn¤ÎºÙ¤«¤¤¸«¤¿ÌÜÄ´À°¤ò¤¢¤­¤é¤á¤Ê¤¤ - Qiita

annot_kws={'fontsize': 9, 'color': 'green'} ¤È¤«¤Ç¤­¤ë¡£

Ï¢´Ø·¸¿ô¤Î·×»»¤Ë»È¤¨¤ëCrosstab

crosstab() — researchpy 0.1.1 documentation

Python¤«¤éPDF¤¬½ñ¤­½Ð¤»¤ëreportlab

ReportLab - Content to PDF Solutions

¤Ç¡¢¤³¤ÎÊÕ¤¬»È¤¨¤½¤¦¡£

# reportlab platypus Table¤ò»È¤¦Îã­¡
# reportlab ¤Î Table ¤Îɽ¼¨°ÌÃÖ¤ò¥³¥ó¥È¥í¡¼¥ë¤¹¤ë
# https://qiita.com/ekzemplaro/items/09bd10b02ecbb35c0efa

from reportlab.lib.pagesizes import A4
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.units import mm
from reportlab.lib import colors
from reportlab.lib.styles import ParagraphStyle
from reportlab.pdfgen import canvas
from reportlab.platypus import Image, Paragraph, Table
from reportlab.pdfbase import pdfmetrics
#from reportlab.pdfbase.cidfonts import UnicodeCIDFont
from reportlab.pdfbase.ttfonts import TTFont
# ------------------------------------------------------------------
#fontname_g = "HeiseiKakuGo-W5"
#pdfmetrics.registerFont(UnicodeCIDFont(fontname_g))
pdfmetrics.registerFont(TTFont('IPAexGothic', 'ipaexg.ttf')) 

cc = canvas.Canvas('example.pdf', pagesize=A4)
width, height = A4

cc.setFont("IPAexGothic", 16)
str_out = "¤³¤ó¤Ë¤Á¤Ï"
cc.drawString(100, 730, str_out)

data = [["¥Æ¥¹¥È", 2, 3], ["ÆüËܸì", 1, 3], [3, 2, 10]]

table = Table(data, colWidths=20*mm)
table.setStyle([("VALIGN", (0,0), (-1,-1), "MIDDLE"),
                ("ALIGN", (0,0), (-1,-1), "CENTER"),
                ('INNERGRID', (0,0), (-1,-1), 0.25, colors.black),
        ('BOX', (0, 0), (-1, -1), 0.25, colors.black),
#        ('FONT', (0, 0), (-1, -1), "HeiseiKakuGo-W5", 16),
        ('FONT', (0, 0), (-1, -1), "IPAexGothic", 16),
        ])
#
table.wrapOn(cc, width, height)
#
table.drawOn(cc, 140*mm, 250*mm)
table.drawOn(cc, 75*mm, 225*mm)
table.drawOn(cc, 10*mm, 200*mm)
#
styles = getSampleStyleSheet()
my_style = styles["Normal"]
my_style.name = "bonlife"
#my_style.fontName = "HeiseiKakuGo-W5"
my_style.fontName = "IPAexGothic"
my_style.fontSize=16

ptext = "¤³¤ì¤Ï¥µ¥ó¥×¥ë¤Ç¤¹¡£"
pp = Paragraph(ptext, style=my_style)
pp.wrapOn(cc, 70*mm, 50*mm)  # size of 'textbox' for linebreaks etc.
pp.drawOn(cc, 50*mm, 190*mm)    # position of text / where to draw

cc.showPage()
cc.save()
print('complete')

¤È¡¢¤â¤Ã¤È´Ê°×ÈǤÇ

# reportlab platypus Table¤ò»È¤¦Îã­¢
# reportlab¤Îplatypus¤ò»È¤Ã¤Ætable¤òÉÁ²è¤¹¤ë¥µ¥ó¥×¥ë¡£wrapOn¤ò¸Æ¤Ó¤Ê¤µ¤¤¤È¤¤¤¦¤³¤È
#https://gist.github.com/bgnori/4452571
   
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import mm

from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont

from reportlab.lib import colors
from reportlab.platypus import Table

#pdfmetrics.registerFont(TTFont('IPA Gothic',
#    '/usr/share/fonts/ipa-gothic/ipag.ttf'))
pdfmetrics.registerFont(TTFont('IPAexGothic', 'ipaexg.ttf'))

xmargin = 8.4*mm
ymargin = 8.8*mm
swidth = 48.3*mm
sheight = 25.4*mm

c = canvas.Canvas('example2.pdf', pagesize=A4)

#c.drawString(xmargin, ymargin, u"¤É¤ä¡¢pdf¤ä¤Ç¡£reportlab!")

t = Table([['a', 'b'], ['1', '2']])
#t.setStyle([('TEXTCOLOR', (0,0), (1,0), colors.red)])  # ¸µ¥µ¥ó¥×¥ë¤Ç¤Ï¤³¤¦¤·¤Æ¤¤¤ë
t.setStyle([('INNERGRID', (0,0), (-1,-1), 0.25, colors.black), # ·ÓÀþ¡¢¾å¤Î¥µ¥ó¥×¥ë¤«¤é¼ÚÍÑ
        ('BOX', (0, 0), (-1, -1), 0.25, colors.black),
        ('FONT', (0, 0), (-1, -1), "IPAexGothic", 16),
        ])

t.wrapOn(c, 100*mm, 100*mm)

t.drawOn(c, 100*mm, 100*mm)
c.showPage()
c.save()
print('complete')

matplotlib¤Ç¤ÎÆüËܸìɽ¼¨

!pip install japanize-matplotlib

¤·¤¿¤¢¤È¤Ç¡¢

import japanize_matplotlib

¤¹¤ë¤À¤±¤Çʸ»ú²½¤±¤ò½¤Àµ¤Ç¤­¤ë¡£

¡Á¡Á¡Á¡Á¡Á¡Á¡Á¡Á¡Á¡Á¡Á¡Á

¤ª¤Þ¤±¡¡jupyter notebook¤Î¥»¥ë¤ÎÉý¤ò¹­¤²¤¿¤¤¤È¤­

Jupyter Notebook¤Î¥»¥ëÉý¤ò¹­¤²¤¿¤¤¡ª - Qiita¡¡¤¬¤è¤µ¤½¤¦

pip install jupyterthemes

jt -cellw 95%

¤Þ¤¿¤Ï¡¢
IPython/Jupyter Notebook enlarge/change cell width · GitHub

custom.css ¤Ç

.container {
    width: 99% !important;
}   

div.cell.selected {
    border-left-width: 1px !important;	
}

div.output_scroll {
    resize: vertical !important;
}

¤Ç¡¢Jupyter Notebook¤ÎºÆµ¯Æ°¤¬É¬Íפ餷¤¤¡£

¤ª¤Þ¤±¡¡bash¤Î¥ë¡¼¥×

#!/usr/bin/bash
samples=("Anc" "1_2-1" "2_2-1" "2_5-1" "2_5-1-7A" "1_2-2" "2_2-2" "2_6-2" "2_6-2-10E")
for f in "${samples[@]}"
do
  #nohup python ProcessGD.py $f > $f.out &
  echo $f
done

¤ª¤Þ¤±¡¡linux¾å¤Îunzip¤Ç´Á»ú¥³¡¼¥É̾¤Îʸ»ú¥³¡¼¥É¤òWindows(SJIS)¤«¤éLinux(UTF-8)¤ØÊÑ´¹¤¹¤ë

unzip -O sjis foo.zip

¤³¤ì¤Ë¤è¤Ã¤Æ¡¢Æâ¢¤µ¤ì¤ë¥Õ¥¡¥¤¥ë¤Î̾Á°¤òSJIS¤«¤éUTF-8¤ËÊÑ´¹¤Ç¤­¤ë¡£


źÉÕ¥Õ¥¡¥¤¥ë: file³¬ÁØ¥«¥é¥à¤ÎÎã.png 752·ï [¾ÜºÙ] filetest_dendrogram.png 754·ï [¾ÜºÙ]

¥È¥Ã¥×   ÊÔ½¸ Åà·ë º¹Ê¬ ¥Ð¥Ã¥¯¥¢¥Ã¥× źÉÕ Ê£À½ ̾Á°Êѹ¹ ¥ê¥í¡¼¥É   ¿·µ¬ °ìÍ÷ ñ¸ì¸¡º÷ ºÇ½ª¹¹¿·   ¥Ø¥ë¥×   ºÇ½ª¹¹¿·¤ÎRSS
Last-modified: 2022-08-29 (·î) 15:35:43 (212d)