Python¥Ð¥¤¥ª?¡¡Python¥Ð¥¤¥ª/¥Ä¡¼¥ë?
2825¡¡¡¡¡¡2019-07-12 (¶â) 12:24:39

EdgeR¤Þ¤ï¤ê

¤Þ¤º¤Ï¡¢EdgeR¤ÇÆó·²´ÖÈæ³Ó

»²¾È¡¡Æó·²´ÖÈæ³Ó¡ÊedgeR¡Ë | RNA-Seq ¤Ë¤è¤ëÈæ³Ó¥È¥é¥ó¥¹¥¯¥ê¥×¥È¡¼¥à²òÀÏ

¹¹¤Ë¸µ¤Ï¡¢edgeRUsersGuide.pdf

¤³¤Î¥Ú¡¼¥¸¤Ë¤è¤ë¤È¡¢

edgeR ¤Î¥Ð¡¼¥¸¥ç¥ó¤¬°Û¤Ê¤ë¤È¡¢²òÀÏ·ë²Ì¤â¾¯¤·°Û¤Ê¤Ã¤Æ¤¯¤ë¡£¤³¤Î¥Ú¡¼¥¸¤Ç¤Ï 3.10.0 ¤òÍøÍѤ·¤Æ¤ë¡£

¤È¤¤¤¦¤³¤È¤À¤¬¡¢¤É¤¦¤À¤í¤¦¤«¡©

¤³¤Î¥Ú¡¼¥¸¤Ë½¾¤Ã¤Æ¡¢¼«Á°¥Ç¡¼¥¿¤Ç»î¤¹¡£

> source("http://bioconductor.org/biocLite.R")
> biocLite("edgeR")
# °Í¸¥é¥¤¥Ö¥é¥ê¤¬Â¿¤¤¤Î¤Ç¡¢º£¤Þ¤Ç»È¤Ã¤Æ¤¤¤Ê¤¤¤È·ë¹½»þ´Ö¤¬¤«¤«¤ë
> library(edgeR)
> count <- read.table("../all.counts.txt", sep="\t", header = T, row.names = 1)
> head(count)
                  Chr Start   End Strand Length SRR453566.sorted.bam
gene_0001 NC_001133.9  1807  2169      -    363                    0
gene_0002 NC_001133.9  2480  2707      +    228                    0
gene_0003 NC_001133.9  7235  9016      -   1782                    0
> count <- count[,c('SRR453566.sorted.bam','SRR453567.sorted.bam','SRR453568.sorted.bam','SRR453569.sorted.bam','SRR453570.sorted.bam', 'SRR453571.sorted.bam')]
> colnames(count) <- c("batch_1","batch_2","batch_3", "chemostat_1", "chemostat_2", "chemostat_3")
> head(count)
          batch_1 batch_2 batch_3 chemostat_1 chemostat_2 chemostat_3
gene_0001       0       2       6           0           0           1
gene_0002       0       0       0           0           0           0
gene_0003       0       0       0           0           0           0
> count <- as.matrix(count)
> dim(count)   # 6420   6
> group <- factor(c("b", "b", "b", "c", "c", "c"))

¡ãexact test¡ä

# ¤Þ¤º¡¢count¥Ç¡¼¥¿¤Ègroup¤ò£±¤Ä¤ÎÊÑ¿ô¤ËÆþ¤ì¤ë¡Ê¥é¥Ù¥ëÉÕ¤±¡Ë
> d <- DGEList(counts = count, group = group)
# ȯ¸½¿ô¤ÎÈó¾ï¤Ë¾¯¤Ê¤¤gene¤ò½üµî¡¡¡¡[[edgeRUsersGuide:https://www.bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf]] p55
> keep <- rowSums(cpm(d)>1) >= 2    # CPM(count per million)¤¬£±°Ê¾å¤¬£²¤Ä°Ê¾å
> d <- d[keep, , keep.lib.sizes=FALSE]
> dim(d)   # 5899   6
# Àµµ¬²½
> d <- calcNormFactors(d)
# ʬ»¶¤Î¿ä¬
> d <- estimateCommonDisp(d)
> d <- estimateTagwiseDisp(d)

# ¤Ç¡¢exact test¤Î¸¡Äê
> result <- exactTest(d)
> dim(result)    # 5899    3

# ·ë²Ìɽ¼¨¤Î¤¿¤á¤Ë¤Ï
> topTags(result)
Comparison of groups:  c-b 
              logFC    logCPM PValue FDR
gene_2989 11.273497  8.543906      0   0
gene_4667  9.683678  9.715407      0   0
gene_3497  8.659467 11.419660      0   0
gene_3421  8.501696  7.911698      0   0
gene_4882  7.545279 10.715972      0   0
gene_0615  7.399665  8.409708      0   0
gene_1031  7.233161  7.473834      0   0
gene_5986  7.169033  6.784983      0   0
gene_2035  6.893378  8.849378      0   0
gene_0017  6.841963 10.445186      0   0
# logFC¤È¤Ï¡§log fold-change¡¢wild type (WT) ¤È knock out (KO) ¤Î¥·¥°¥Ê¥ëÃͤòÈæ³Ó¤¹¤ë¤È¤­¡¢log2(KO)/log2(WT)
# locCPM¤È¤Ï¡§log counts per million
# FDR:  False Discovery Rate

# ·ë²Ì¤ò¥Õ¥¡¥¤¥ë¤Ë³ÊǼ¤¹¤ë
> table <- as.data.frame(topTags(result, n = nrow(count)))
> write.table(table, file = "result.txt", col.names = T, row.names = T, sep = "\t")
# ¥Õ¥¡¥¤¥ë¤Î¡¢ÅÓÃæ¤Î¥¨¥ó¥È¥ê¡¼¤ÎÎã
        "logFC"     "logCPM"        "PValue"        "FDR"
"gene_2901"     0.167058457314462       6.8615588901153 0.0493154259214582      0.0667381733220193
"gene_0340"     0.289892217524922       5.33859450515354        0.0494251531593589      0.0668713253410684
"gene_0519"     0.189496458586853       6.08722498147699        0.0494834466115673      0.0669348432840255
"gene_0954"     0.256044030124387       5.65989391337004        0.0496916298717389      0.0672010372795479
"gene_3371"     -0.205143944867366      9.08724668881671        0.0498370683360759      0.0673822750663561
"gene_0287"     -0.255406659792859      5.95521336812612        0.0501243894523602      0.0677552184645904
"gene_5879"     0.208805500563976       5.93528971411533        0.0503683786485931      0.0680694308472052
"gene_5595"     -0.172878902469866      6.9102371649123 0.0503961888612053      0.0680914150463238
"gene_2980"     -0.174306726898056      6.36273018338751        0.0505773479860026      0.0683201796940756
"gene_3461"     0.190387033748589       6.37468042292314        0.0505886667068524      0.0683201796940756

exactTest From edgeR v3.14.0

DGEList

# ·ë²Ì¤ò¿Þ¼¨¤¹¤ë
> is.DEG <- as.logical(table$FDR < 0.01)
> DEG.names <- rownames(table)[is.DEG]
> plotSmear(result, de.tags = DEG.names)

PlotSmear.png ¤«¤Ê¤ê¤Ï¤ß½Ð¤·¤Æ¤¤¤ë¡©

¡ãÌàÅÙÈæ¸¡Äê¡ä

> design <- model.matrix(~ group)
> design
  (Intercept) groupc
1           1      0
2           1      0
3           1      0
4           1      1
5           1      1
6           1      1
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"

# ²òÀϤ¹¤ë
# ¤Þ¤º¤â¤¦°ìÅ٥ǡ¼¥¿¤òcount¤«¤éºî¤êľ¤·
> d <- DGEList(counts = count, group = group)
# Á°¤ÈƱÍͤˡ¢È¯¸½¿ô¤ÎÈó¾ï¤Ë¾¯¤Ê¤¤gene¤ò½üµî
> keep <- rowSums(cpm(d)>1) >= 2    # CPM(count per million)¤¬£±°Ê¾å¤¬£²¤Ä°Ê¾å
> d <- d[keep, , keep.lib.sizes=FALSE]
# ¤³¤³¤«¤é¡¢Ê¬ÉÛ¡Êʬ»¶¡Ë¤Î¿äÄê¡¡GLM¤ò»È¤¦
> d <- calcNormFactors(d)
> d <- estimateGLMCommonDisp(d, design)
> d <- estimateGLMTrendedDisp(d, design)
> d <- estimateGLMTagwiseDisp(d, design)
# ʬÉۤ˥ե£¥Ã¥È
> fit <- glmFit(d, design)
# ÌàÅÙÈæ¸¡Äê
¤³¤Î¤È¤­¡¢full model ¤È reduced model ¤òÈæ¤Ù¤ë¤Î¤Ç¡¢¤³¤Î¾ðÊó¤ò´Ø¿ô¤ËÍ¿¤¨¤ë¡£
glmFit ¤òÍøÍѤ·¤ÆºîÀ®¤·¤¿ fit ¤Ë¤Ï full model ¤Î¥Ç¡¼¥¿´Þ¤Þ¤ì¤Æ¤¤¤ë¡£½¾¤Ã¤Æ¡¢¤¢¤È¤Ï
reduced model ¤Î¾ðÊó¤À¤±¤òÍ¿¤¨¤ì¤Ð¤è¤¤¡£¤³¤³¤Ç¡¢full model ¤È reduced model ¤Î
¥Ç¥¶¥¤¥ó¹ÔÎó¤ò¸«Èæ¤Ù¤Æ¤ß¤ë¤È¡¢reduced model ¤Ï full model ¤Î¥Ç¥¶¥¤¥ó¹ÔÎó¤Î 2 Îó¤ò
¤¹¤Ù¤Æ 0 ¤Ë¤·¤¿¤â¤Î¤ËÅù¤·¤¤¤³¤È¤¬¤ï¤«¤ë¡£¤½¤³¤Ç¡¢coef = 2 ¤È»ØÄꤷ¤Æ¡¢reduced model
¤Î¾ðÊó¤òÍ¿¤¨¤ë¡£
> lrt <- glmLRT(fit, coef = 2)
> dim(lrt)  # 5899   4
# ·ë²Ì¤Ï
> topTags(lrt)
Coefficient:  groupc 
              logFC    logCPM       LR PValue FDR
gene_2989 11.272397  8.543271 2260.090      0   0
gene_3497  8.658186 11.419157 1934.825      0   0
gene_3421  8.500384  7.911092 2271.734      0   03859
gene_4882  7.543818 10.715354 2673.692      0   0
gene_0615  7.398431  8.409080 1897.589      0   0

# ¥Õ¥¡¥¤¥ë¤Ë½ÐÎÏ
> lrttable <- as.data.frame(topTags(lrt, n = nrow(count)))
> write.table(lrttable, file="lrtresult.txt", col.names=T, row.names=T, sep="\t")
               logFC           logCPM       LR        PValue              FDR
"gene_3295"     0.247645680731094       5.40510860572214        3.87945550718096        0.0488804073717157      0.065772245229414
"gene_4943"     -0.194295943127542      7.18848127443337        3.8631344790163 0.0493580361534112      0.0663997845539276
"gene_3461"     0.190360832039291       6.37466110822439        3.85464691865404        0.0496083672460962      0.0667213311410673
"gene_6181"     -0.204622554470669      5.77230370218388        3.85415389282258        0.0496229496429794      0.0667257305548064
"gene_1983"     0.856380812164874       1.74233409044916        3.8422391885909 0.0499767355704516      0.0671861356267307
"gene_5577"     -0.263126831966537      5.60034347444897        3.84011138664067        0.050040196820857       0.0672561223618673
"gene_3658"     -0.194852891673291      6.72394023500093        3.82819449899496        0.0503971930016531      0.0677205105960711
"gene_2310"     -0.197601718589681      6.24609836497735        3.82407278015195        0.0505212936731495      0.0678718085579387
"gene_2980"     -0.17430474127026       6.36273796901708        3.81688233743943        0.0507385643479778      0.0681481764773954
"gene_0447"     0.325651368079434       4.40033732555139        3.81349708835367        0.0508411968215156      0.0682704803209925

Áý¸º¤ÎÁ´ÂΤò¸«¤ë¤È¡¡¡ÊEdgeUsersGuide p58¡Ë

>summary(decideTests(lrt))    # 1% FDR
       groupc
Down     2072
NotSig   1620
Up       2207

MD¿Þ¤òÉÁ¤¯¤³¤È¤¬¤Ç¤­¤ë¡¡¡¡¡ÊEdgeUsersGuide p58¡Ë

> plotMD(lrt)
> abline(h=c(-1,1), col="blue")

MDchart.png

glmFit/glmLRT

¡ãGO analysis¡ä

> biocLite("GO.db")
> biocLite("org.Hs.eg.db")
> go <- goana(lrt)
 goana.default(de = DEGenes, universe = universe, ...) ¤Ç¥¨¥é¡¼: 
  No annotated genes found in universe

źÉÕ¥Õ¥¡¥¤¥ë: fileMDchart.png 758·ï [¾ÜºÙ] filePlotSmear.png 658·ï [¾ÜºÙ]

¥È¥Ã¥×   ÊÔ½¸ Åà·ë º¹Ê¬ ¥Ð¥Ã¥¯¥¢¥Ã¥× źÉÕ Ê£À½ ̾Á°Êѹ¹ ¥ê¥í¡¼¥É   ¿·µ¬ °ìÍ÷ ñ¸ì¸¡º÷ ºÇ½ª¹¹¿·   ¥Ø¥ë¥×   ºÇ½ª¹¹¿·¤ÎRSS
Last-modified: 2019-07-12 (¶â) 12:24:39 (1356d)