[[Python¥Ð¥¤¥ª]]¡¡[[Python¥Ð¥¤¥ª/¥Ä¡¼¥ë]]~ &counter();¡¡¡¡¡¡&lastmod();~ *EdgeR¤Þ¤ï¤ê [#p0dc7d75] [[Python¥Ð¥¤¥ª/¥Ä¡¼¥ë/RNAseq¤-X EdgeR¤Þ¤ï¤ê]]¤È¤Û¤ÜƱ¤¸¤³¤È¤ò¡¢Python¤«¤éR¤ØÊü¤Ã¤Æ¹Ô¤¦¡£ **¤Þ¤º¤Ï¡¢EdgeR¤ÇÆó·²´ÖÈæ³Ó [#z436b5c9] »²¾È¡¡[[Æó·²´ÖÈæ³Ó¡ÊedgeR¡Ë | RNA-Seq ¤Ë¤è¤ëÈæ³Ó¥È¥é¥ó¥¹¥¯¥ê¥×¥È¡¼¥à²òÀÏ:https://bi.biopapyrus.jp/rnaseq/analysis/de-analysis/2g-edger.html]] ¹¹¤Ë¸µ¤Ï¡¢[[edgeRUsersGuide.pdf:https://www.bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf]] import pandas as pd import rpy2 from rpy2.robjects.packages import importr print(rpy2.__version__) from rpy2.robjects import pandas2ri pandas2ri.activate() from rpy2.robjects import r, StrVector, FactorVector base = importr('base') base.source("http://www.bioconductor.org/biocLite.R") biocinstaller = importr("BiocInstaller") # ºÇ½é¤À¤± biocinstaller.biocLite("edgeR") # ºÇ½é¤À¤± edgeR = importr("edgeR") count = pd.read_csv('../all.counts.txt', sep='\t', skiprows=1) # ½é¤á¤ÏGeneid¤òÉÕ¤±¤Æ¤ª¤¯ count = count[['Geneid', 'SRR453566.sorted.bam','SRR453567.sorted.bam','SRR453568.sorted.bam','SRR453569.sorted.bam','SRR453570.sorted.bam', 'SRR453571.sorted.bam']] count.columns = ['Geneid', "batch_1","batch_2","batch_3", "chemostat_1", "chemostat_2", "chemostat_3"] r.assign("count", pandas2ri.py2ri(count)) # count¤òR¤Ø¥¢¥Ã¥×¤¹¤ë r('genelist <- count[,c("Geneid")]') # R¾å¤ÇGeneid¤ò³Ð¤¨¤Æ¤ª¤¯ r('count <- count[, colnames(count) != "Geneid"]') # R¾å¤ÇGeneidÎó¤ò¾Ã¤¹ group = FactorVector(StrVector(['b', 'b', 'b', 'c', 'c', 'c'])) # group¤òºî¤Ã¤Æ¤ª¤¯ r.assign("group", group) r('d <- DGEList(counts = count, group = group, genes = genelist)') # count¤Ègroup¤«¤éDEGlist¤òºî¤ë¡¢genes¤ËGeneid(¡Ágenelist)¤òÆþ¤ì¤ë r('d <- calcNormFactors(d)') # ½êÄê¤Î¼ê³¤£± r('d <- estimateCommonDisp(d)') # £² r('d <- estimateTagwiseDisp(d)') # £³ r('result <- exactTest(d)') r('result0 <- result$table') # R¦¤Çtable¤À¤±¤Ë¤¹¤ë¡£Á´Éô»ý¤Ã¤Æ¤¯¤ë¤È¼«Æ°ÊÑ´¹»þ¤Ë pandas¥¯¥é¥¹¤Ç¤Ï¤Ê¤¯rpy2¥¯¥é¥¹¤Ë¤·¤«¤Ê¤é¤Ê¤¤ r('result0ix <- result$genes') # result$table ¤Ë¤ÏGeneid¤¬Æþ¤Ã¤Æ¤¤¤Ê¤¤ r0 = r['result0'] print('r0\n', r0.head()) r0ix = r['result0ix'] r0.index = r0ix['genes'] # r0¤Ëindex¤È¤·¤ÆGeneid¤òÉÕ¤±¤ë print('result0\n', r0.head()) print('pvalues\n', (r0.sort_values(['PValue', 'logFC'], ascending=[True, False])).head(20)) ## sort_values¤ÇÊ£¿ô¥¡¼¤ò»ØÄê¤Ç¤¤ë¡£¹¹¤Ëascending¾ò·ï¤¬°ã¤Ã¤Æ¤â¤è¤¤ r('r2 <- topTags(result, n=nrow(count))') # R¾å¤ÇtopTags¤òÄ̤¹¡£ r('r22 <- r2$table') # ·ë²Ì¤ÎtableÉôʬ¤À¤±¤òÃê½Ð¤¹¤ë r2 = r['r22'] # Python¦¤Ø»ý¤Ã¤Æ¤¯¤ë print('r2\n', r2.head()) ################################ # ´°Á´¤ËºÇ½é¤«¤é count = pd.read_csv('../all.counts.txt', sep='\t', skiprows=1) count = count[['Geneid', 'SRR453566.sorted.bam','SRR453567.sorted.bam','SRR453568.sorted.bam','SRR453569.sorted.bam','SRR453570.sorted.bam', 'SRR453571.sorted.bam']] count.columns = ['Geneid', "batch_1","batch_2","batch_3", "chemostat_1", "chemostat_2", "chemostat_3"] r.assign("count", pandas2ri.py2ri(count)) r('genelist <- count[,c("Geneid")]') r('count <- count[, colnames(count) != "Geneid"]') group = FactorVector(StrVector(['b', 'b', 'b', 'c', 'c', 'c'])) r.assign("group", group) r('d <- DGEList(counts = count, group = group, genes=genelist)') r('row.names(d$count) <- genelist') r('row.names(d$counts) <- genelist') r('design <- model.matrix(~ group)') r('d <- estimateGLMCommonDisp(d, design)') r('d <- estimateGLMTrendedDisp(d, design)') r('d <- estimateGLMTagwiseDisp(d, design)') r('fit <- glmFit(d, design)') r('lrt <- glmLRT(fit, coef = 2)') # LRT¤ò½ÐÎÏ r('lrt0 <- topTags(lrt, n = nrow(count))') # topTag¤òÄ̤¹¡£ # print('lrt0\n', r['lrt0']) r('lrtgenes <- lrt0$table[, "genes"]') lrtgenes = r['lrtgenes'] r('lrttable <- lrt0$table') # tableÉôʬ¤À¤±¤òÃê½Ð¤¹¤ë lrttable = r['lrttable'] # Python¤Î¦¤Ø»ý¤Ã¤Æ¤¯¤ë print(type(lrttable)) print('lrttable\n', lrttable.head(20)) lrttable.to_csv("lrttable.csv", sep='\t') #decideTests¤ÇGeneid¤¬½ÐÎϤËÅϤµ¤ì¤Ê¤¤¡£ r('dt <- decideTests(lrt)') dt = pd.DataFrame(r['dt']) dt.index = lrtgenes print('dt\n', dt) -[[ExactTests:https://www.rdocumentation.org/packages/edgeR/versions/3.14.0/topics/exactTest]] -[[topTags function | R Documentation:https://www.rdocumentation.org/packages/edgeR/versions/3.14.0/topics/topTags]] -[[DGELRT-class function | R Documentation:https://www.rdocumentation.org/packages/edgeR/versions/3.10.5/topics/DGELRT-class]] -[[DGEList function | R Documentation:https://www.rdocumentation.org/packages/edgeR/versions/3.14.0/topics/DGEList]] -[[glmFit function | R Documentation:https://www.rdocumentation.org/packages/edgeR/versions/3.14.0/topics/glmFit]] -[[decideTests function | R Documentation: https://www.rdocumentation.org/packages/limma/versions/3.28.14/topics/decideTests]] -[[decideTests function | R Documentation:https://www.rdocumentation.org/packages/limma/versions/3.28.14/topics/decideTests]] -[[TestResults-class function | R Documentation:https://www.rdocumentation.org/packages/limma/versions/3.28.14/topics/TestResults-class]] -[[dimnames function | R Documantation:https://www.rdocumentation.org/packages/base/versions/3.6.1/topics/dimnames]] -[[Rpy2¤Î»È¤¤Êý¤ÎÈ÷˺Ͽ - Qiita:https://qiita.com/MTNakata/items/04a045f4beb0fa9e3a4d]] -[[rpy2 / R interface — pandas 0.22.0 documentation:https://pandas.pydata.org/pandas-docs/version/0.22/r_interface.html]] -[[Python ¤È R ¤Î°ã¤¤ (¥Ç¡¼¥¿¥Õ¥ì¡¼¥àÊÔ) – Python ¤Ç¥Ç¡¼¥¿¥µ¥¤¥¨¥ó¥¹:https://pythondatascience.plavox.info/python%E3%81%A8r%E3%81%AE%E9%81%95%E3%81%84/%E3%83%87%E3%83%BC%E3%82%BF%E3%83%95%E3%83%AC%E3%83%BC%E3%83%A0]] -[[R¤Î¥Ç¡¼¥¿¥Õ¥ì¡¼¥à¤òPython¤Îpandas¥Ç¡¼¥¿¥Õ¥ì¡¼¥à¤Ë¤¹¤ëÊýË¡ - Qiita:https://qiita.com/tatekaiten/items/e29ac74660b249fdd42f]] -[[Converting DataFrames to and from R - python:http://www.jamesloach.com/python/pandas2ri.html]] -[[python¤«¤éÅý·×²òÀÏ¥½¥Õ¥ÈR¤òÍøÍѤ¹¤ëÊýË¡¡¦Â裱²ó:http://cup.sakura.ne.jp/pyrcmd/pyrcmd01.htm]] -[[python¤«¤éÅý·×²òÀÏ¥½¥Õ¥ÈR¤òÍøÍѤ¹¤ëÊýË¡¡¦Â裲²ó:http://cup.sakura.ne.jp/pyrcmd/pyrcmd02.htm]] -[[Python rpy2 ¤Ç pandas ¤Î DataFrame ¤ò R ¤Î data.frame ¤ËÊÑ´¹¤¹¤ë - StatsFragments:http://sinhrks.hatenablog.com/entry/2014/10/16/224948]] -[[R-Source ¥Ç¡¼¥¿·¿¤È¥Ç¡¼¥¿¹½Â¤:http://cse.naro.affrc.go.jp/takezawa/r-tips/r/25.html]] -[[R_Îó̾¤ÇÎó¤òºï½ü¤¹¤ë - R¤ÈLinux¤È...:http://rmecab.jp/wiki/index.php?R_%CE%F3%CC%BE%A4%C7%CE%F3%A4%F2%BA%EF%BD%FC%A4%B9%A4%EB]] -[[R: Get and Set Row Names for Data Frames:https://stat.ethz.ch/R-manual/R-devel/library/base/html/row.names.html]] -[[Îó̾¤ò»ØÄꤷ¤Æ¥Ç¡¼¥¿¤òÃê½Ð¤¹¤ë¡ÚR¡Û - QIITA:https://qiita.com/hitsujisuke/items/d71ee40daa0786ae1680]] -[[Pandas¤ÇÎó¥Ç¡¼¥¿¤ò¥½¡¼¥È¤¹¤ësort_values´Ø¿ô¤Î»È¤¤Êý - DeepAge:https://deepage.net/features/pandas-sort-values.html]] -[[pÃͤÈͰտå½à | ¥Ö¥í¥° | Åý·×WEB:https://bellcurve.jp/statistics/blog/14004.html]] -[[Í°Õ - Wikipedia:https://ja.wikipedia.org/wiki/%E6%9C%89%E6%84%8F]] -[[ÌàÅÙÈæ¸¡Äê - Wikipedia:https://ja.wikipedia.org/wiki/%E5%B0%A4%E5%BA%A6%E6%AF%94%E6%A4%9C%E5%AE%9A]] -[[Àµµ¬Ê콸ÃÄ¡Êʬ»¶´ûÃΡˤÎÊ¿¶Ñ¤Ë´Ø¤¹¤ë¸¡Äê | ͰդË̵°ÕÌ£¤ÊÏÃ:http://starpentagon.net/analytics/z_test_via_lrt/]]