![]() |
¥Î¡¼¥È/Python¤«¤éJava¤ò¸Æ½Ð¤¹http://pepper.is.sci.toho-u.ac.jp/pepper/index.php?%A5%CE%A1%BC%A5%C8%2FPython%A4%AB%A4%E9Java%A4%F2%B8%C6%BD%D0%A4%B9 |
![]() |
¥Î¡¼¥È
ˬÌä¼Ô¿ô¡¡6002¡¡¡¡¡¡¡¡¡¡¡¡ºÇ½ª¹¹¿·¡¡2015-04-24 (¶â) 09:14:02
¼ÂºÝ¤ËStanford Parser¤ò¸Æ¤Ó½Ð¤·¤Æ¤ß¤ë¡£
¤Þ¤º¡¢Java¤Î¥³¡¼¥É¤Î¥Õ¥¡¥¤¥ëParserDemo.java¤òºî¤ë¡£
import java.util.*; import edu.stanford.nlp.trees.*; import edu.stanford.nlp.parser.lexparser.LexicalizedParser; class ParserDemo { public static void main(String[] args) { LexicalizedParser lp = new LexicalizedParser("/usr/local/stanford-parser-2008-10-26/englishPCFG.ser.gz"); lp.setOptionFlags(new String[]{"-maxLength", "80", "-retainTmpSubcategories"}); String[] sent = { "This", "is", "an", "easy", "sentence", "." }; Tree parse = (Tree) lp.apply(Arrays.asList(sent)); parse.pennPrint(); System.out.println(); TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); Collection tdl = gs.typedDependenciesCollapsed(); System.out.println(tdl); System.out.println(); TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(parse); } }
¤³¤ì¤ò¡¢¤¢¤é¤«¤¸¤ájavac¤Ç¥³¥ó¥Ñ¥¤¥ë¤·¤Æ¤ª¤¯¡£ParserDemo.class¤¬À¸À®¤µ¤ì¤ë¡£
¤³¤Î¥Ð¥¤¥È¥³¡¼¥É¥Õ¥¡¥¤¥ë¤ò¡¢Python/NLTK¤«¤é¸Æ¤Ó½Ð¤¹¤³¤È¤¬¤Ç¤¤ë¡£
python >>> import nltk >>> nltk.internals.java(['ParserDemo'],'/home/yamanouc/src/stanford:/usr/local/stanford-parser/stanford-parser.jar') Loading parser from serialized file /usr/local/stanford-parser-2008-10-26/englishPCFG.ser.gz ... done [3.0 sec]. (ROOT [35.686] (S [35.580] (NP [6.140] (DT [2.301] This)) (VP [28.298] (VBZ [0.144] is) (NP [22.961] (DT [3.221] an) (JJ [8.072] easy) (NN [8.897] sentence))) (. [0.002] .))) [nsubj(sentence-5, This-1), cop(sentence-5, is-2), det(sentence-5, an-3), amod(sentence-5, easy-4)] (ROOT (S (NP (DT This)) (VP (VBZ is) (NP (DT an) (JJ easy) (NN sentence))) (. .))) nsubj(sentence-5, This-1) cop(sentence-5, is-2) det(sentence-5, an-3) amod(sentence-5, easy-4) (None, None) >>>
⤷¡¢¤³¤Î»È¤¤Êý¤Ï¡¢Java¤Î¥×¥í¥°¥é¥à¤ò¥Ð¥Ã¥ÁŪ¤Ë»È¤¦¤À¤±¤·¤«¤Ç¤¤º¡¢Stdin¤ÈStdout¤Ç¥Ç¡¼¥¿¤Î¼õ¤±ÅϤ·¤ò¤¹¤ë¤³¤È¤¬¤Ç¤¤ë¤À¤±¤Ç¤¢¤ë¡£¤À¤«¤é¡¢¤Ç¤¤¿¥È¥ê¡¼¤Ï¡¢Ê¸»úÎó·Á¼°¤Ç½ÐÎϤ¹¤ë¤È¡¢¤½¤ì¤ò¤â¤¦°ìÅÙPythonÆâ¤Ç¥È¥ê¡¼¹½Â¤¤Ëºî¤êľ¤¹Êý¤¬»È¤¤°×¤¯¤Ê¤ë¤À¤í¤¦¡£¡Ê¤³¤ÎÉôʬ¤Ï¤³¤ì¤«¤é¸¡Æ¤¡Ë
¤Ç¤Ï¡¢Jython¡Êhttp://www.jython.org¡Ë¤ò¥¤¥ó¥¹¥È¡¼¥ë¤¹¤ë¡£Oregano¤Ç¤ÏFecore Core¤Ê¤Î¤Çyum¤Ç¥¤¥ó¥¹¥È¡¼¥ë¤Ç¤¤ë¡£
Jython¤ò¥¤¥ó¥¹¥È¡¼¥ë¤¹¤ë¤¿¤á¤Ë¤Ï¡¢Java¤¬É¬Íס£Java¤ÏJava¤Î¥µ¥¤¥È¤«¤éºÇ¿·¤Î¤â¤Î¡Ê1.6.0_12¡Ë¤ò¥¤¥ó¥¹¥È¡¼¥ë¡£
Jython¤ÇPython¤«¤éJava¥×¥í¥°¥é¥à¤ò¸Æ¤Ó½Ð¤¹¼ê½ç¤Ï¡ÊJython User Guide¤ÎAccessing Java from Jython¤Ë¤è¤ë¡Ë
C:\jython>jython Jython 2.0 on java1.2.1 Type "copyright", "credits" or "license" for more information. >>> from java.util import Random >>> r = Random() >>> r.nextInt() -790940041 >>> for i in range(5): ... print r.nextDouble() ... 0.23347681506123852 0.8526595592189546 0.3647833839988137 0.3384865260567278 0.5514469740469587
¤ä
Jython 2.0 on java1.2.1 Type "copyright", "credits" or "license" for more information. >>> from java.util import * >>> Random <jclass java.util.Random at 31702169> >>> Hashtable <jclass java.util.Hashtable at 7538094>
¤Ê¤É¡£
¤Ç¤Ï¡¢Stanford Parser (/usr/local/stanford-parser/*) ¤ò¸Æ¤Ó½Ð¤¹¤³¤È¤ò¹Í¤¨¤ë¡£
¤Þ¤º¥Ù¡¼¥¹¤È¤·¤ÆJavaÈǤΥ¢¥¯¥»¥¹¥×¥í¥°¥é¥à¡Ê¥Ñ¥Ã¥±¡¼¥¸Ãæ/ParserDemo.java¡Ë¤ò¸«¤Æ¤ß¤è¤¦¡£
import java.util.*; import edu.stanford.nlp.trees.*; import edu.stanford.nlp.parser.lexparser.LexicalizedParser; class ParserDemo { public static void main(String[] args) { LexicalizedParser lp = new LexicalizedParser("englishPCFG.ser.gz"); lp.setOptionFlags(new String[]{"-maxLength", "80", "-retainTmpSubcategories"}); String[] sent = { "This", "is", "an", "easy", "sentence", "." }; Tree parse = (Tree) lp.apply(Arrays.asList(sent)); parse.pennPrint(); System.out.println(); TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); Collection tdl = gs.typedDependenciesCollapsed(); System.out.println(tdl); System.out.println(); TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(parse); } }
¤³¤ì¤Ï¡¢Java¤È¤·¤Æ¥³¥ó¥Ñ¥¤¥ë¡Êjavac ParserDemo.java¡Ë¢Í¼Â¹Ô¡Êjava ParserDemo¡Ë¤Ç¤¤ë¤³¤È¤ò³Îǧ¤¹¤ë¡£
$ java ParserDemo Loading parser from serialized file englishPCFG.ser.gz ... done [3.1 sec]. (ROOT [35.686] (S [35.580] (NP [6.140] (DT [2.301] This)) (VP [28.298] (VBZ [0.144] is) (NP [22.961] (DT [3.221] an) (JJ [8.072] easy) (NN [8.897] sentence))) (. [0.002] .))) [nsubj(sentence-5, This-1), cop(sentence-5, is-2), det(sentence-5, an-3), amod(sentence-5, easy-4)] (ROOT (S (NP (DT This)) (VP (VBZ is) (NP (DT an) (JJ easy) (NN sentence))) (. .))) nsubj(sentence-5, This-1) cop(sentence-5, is-2) det(sentence-5, an-3) amod(sentence-5, easy-4)
¤Ç¤Ï¡¢¤³¤ì¤òPython¤«¤é¸Æ¤Ó½Ð¤¹¤è¤¦¤Ë½ñ¤´¹¤¨¤ë¡ÊParserDemo.py¡Ë¡£
#!/usr/bin/env python # encoding: utf-8 # -*- coding: utf-8 -*- # coding: utf-8 ## import sys import codecs import os sys.path.append('/usr/local/stanford-parser/stanford-parser.jar') #import java.util.* from java.util import * #import edu.stanford.nlp.trees.* from edu.stanford.nlp.trees import * #import edu.stanford.nlp.parser.lexparser.LexicalizedParser from edu.stanford.nlp.parser.lexparser import LexicalizedParser ### A magic for printing UTF-8 characters sys.stdout = codecs.getwriter('utf_8')(sys.stdout) #LexicalizedParser lp = new LexicalizedParser("englishPCFG.ser.gz") lp = LexicalizedParser("/usr/local/stanford-parser/englishPCFG.ser.gz") #optionstring = {"-maxLength ", "80", "-retainTmpSubcategories"} optionstring = ["-maxLength", "80", "-retainTmpSubcategories"] lp.setOptionFlags(optionstring) ##sent = [ "This", "is", "an", "easy", "sentence", "." ] #<< This works(1). #sent = ( "This", "is", "an", "easy", "sentence", "." ) #This is also acceptable #send_newversion = "This is an easy sentence." #Tree parse = (Tree) lp.apply(Arrays.asList(sent)) #parse = lp.apply(Arrays.asList(sent)) #<< This works(1) parse = lp.apply(sent) parse.pennPrint() print "-" * 80 #TreebankLanguagePack tlp = new PennTreebankLanguagePack(); tlp = PennTreebankLanguagePack(); #GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); gsf = tlp.grammaticalStructureFactory(); #GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); gs = gsf.newGrammaticalStructure(parse); #Collection tdl = gs.typedDependenciesCollapsed(); tdl = gs.typedDependenciesCollapsed(); print(tdl); print "-" * 80 #TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed"); tp = TreePrint("penn,typedDependenciesCollapsed"); tp.printTree(parse);
¼ç¤ÊÅÀ¤Ï¡¢
¤³¤Î¥×¥í¥°¥é¥à¤òjython¤Çµ¯Æ°¼Â¹Ô¤¹¤ë¡£
$jython ParserDemo.py Loading parser from serialized file /usr/local/stanford-parser/englishPCFG.ser.gz ... done [2.9 sec]. (ROOT [35.686] (S [35.580] (NP [6.140] (DT [2.301] This)) (VP [28.298] (VBZ [0.144] is) (NP [22.961] (DT [3.221] an) (JJ [8.072] easy) (NN [8.897] sentence))) (. [0.002] .))) -------------------------------------------------------------------------------- [nsubj(sentence-5, This-1), cop(sentence-5, is-2), det(sentence-5, an-3), amod(sentence-5, easy-4)] -------------------------------------------------------------------------------- (ROOT (S (NP (DT This)) (VP (VBZ is) (NP (DT an) (JJ easy) (NN sentence))) (. .))) nsubj(sentence-5, This-1) cop(sentence-5, is-2) det(sentence-5, an-3) amod(sentence-5, easy-4)
¤È¤Ê¤Ã¤¿¡£
»²¾È¡§
¥¤¥ó¥¹¥È¡¼¥ë
pip install javabridge
¿¾¯¥¨¥é¡¼·Ù¹ð¤¬½Ð¤¿¤¬½ªÎ»¡£
»È¤Ã¤Æ¤ß¤ë¡£¥Û¡¼¥à¤Î¥É¥¥å¥á¥ó¥È¥Ú¡¼¥¸ ¤Ë¤¢¤ëÎã¡£
import os import javabridge javabridge.start_vm(run_headless=True) try: print javabridge.run_script('java.lang.String.format("Hello, %s!", greetee);', dict(greetee='world')) finally: javabridge.kill_vm()
¼Â¹Ô¤Ç¤¤¿¡£
Ʊ¥Ú¡¼¥¸¤Ë¤¢¤ë¾¤Î¥µ¥ó¥×¥ë¤Î¤¿¤á¤Ë¡¢wxPython¤ò¥¤¥ó¥¹¥È¡¼¥ë¡£
yum install wxPython-devel
¤¤¤¯¤Ä¤«°Í¸¥Õ¥¡¥¤¥ë¤â¥¤¥ó¥¹¥È¡¼¥ë¤µ¤ì¤¿¡£
¥¤¥ó¥¹¥È¡¼¥ë: wxPython-devel.x86_64 0:2.8.12.0-1.el6 °Í¸À´ØÏ¢¤ò¥¤¥ó¥¹¥È¡¼¥ë¤·¤Þ¤·¤¿: bakefile.x86_64 0:0.2.8-3.el6.centos python-empy.noarch 0:3.3-5.el6.centos wxBase.x86_64 0:2.8.12-1.el6.centos wxGTK.x86_64 0:2.8.12-1.el6.centos wxGTK-devel.x86_64 0:2.8.12-1.el6.centos wxGTK-gl.x86_64 0:2.8.12-1.el6.centos wxGTK-media.x86_64 0:2.8.12-1.el6.centos wxPython.x86_64 0:2.8.12.0-1.el6