JWI 是 MIT 開發用來操作 WordNet 的介面,它的官網如下:
http://projects.csail.mit.edu/jwi/
使用這個API之前,請先安裝 WordNet,這樣才可以寫程式去使用這個 Interface。
WordNet 官網: https://wordnet.princeton.edu/
以下記錄著使用 WordNet 搭配這個 JWI 來抓取同義字的 Java 程式。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package wordnetdictionaryjwi; | |
import edu.mit.jwi.Dictionary; | |
import edu.mit.jwi.IDictionary; | |
import edu.mit.jwi.item.IIndexWord; | |
import edu.mit.jwi.item.ISynset; | |
import edu.mit.jwi.item.IWord; | |
import edu.mit.jwi.item.IWordID; | |
import edu.mit.jwi.item.POS; | |
import java.io.BufferedReader; | |
import java.io.File; | |
import java.io.FileReader; | |
import java.io.IOException; | |
import java.net.MalformedURLException; | |
import java.net.URL; | |
import java.util.ArrayList; | |
public class WordNetDictionaryJWI { | |
public static void main(String[] args) throws MalformedURLException, IOException { | |
// construct the URL to the Wordnet dictionary directory | |
//不需要使用下列兩行,因為會讓讀取路徑出錯 | |
//String wnhome = System . getenv ("C:\\Program Files (x86)\\WordNet\\2.1"); | |
//String path = wnhome + File. separator + "dict\\"; | |
ArrayList synonymList = new ArrayList(); | |
URL url = new URL("file", null,"C:\\Program Files (x86)\\WordNet\\2.1\\dict\\"); | |
//Construct the dictionary object and open it | |
IDictionary dict = new Dictionary(url); | |
dict.open(); | |
//Look up first sense of the word "dog" | |
//將預設的字丟進synonymList中 | |
FileReader fr = new FileReader("seed.txt"); | |
BufferedReader br = new BufferedReader(fr); | |
while(br.ready()){ | |
synonymList.add(br.readLine()); | |
} | |
System.out.println("ㄧ開始seed的字 : "+synonymList); | |
ArrayList tempList = new ArrayList(); | |
//System.out.println(tempList.size()); | |
ArrayList pos = new ArrayList(); | |
pos.add(POS.NOUN); | |
pos.add(POS.ADJECTIVE); | |
pos.add(POS.ADVERB); | |
pos.add(POS.VERB); | |
//掃幾層 | |
for(int k=0;k<2;k++){ | |
//每一個字取同義字 | |
for(int r=0;r<synonymList.size();r++){ | |
//對於每一種詞性 | |
for(int i=0;i<pos.size();i++){ | |
IIndexWord idxWord = dict.getIndexWord(synonymList.get(r), pos.get(i)); | |
//如果這個詞性存在 | |
if(idxWord!=null) { | |
//讀出字所有的sense, 並取其同義字 | |
for(int j=0;j<idxWord.getWordIDs().size();j++){ | |
IWordID wordID = idxWord.getWordIDs().get(j);//j-th sense | |
//取出j-th sense的字 | |
IWord word = dict.getWord(wordID); | |
//取出該word的同義字 | |
ISynset synset = word.getSynset(); | |
//針對這些同義字 | |
for(IWord w:synset.getWords()) | |
{ | |
String temp = w.getLemma().replaceAll("[,;:.?!'_-]"," "); | |
if(!tempList.contains(temp)) | |
{ | |
tempList.add(temp); | |
System.out.println(temp); | |
} | |
} | |
} | |
} | |
}//end 每一種詞性 | |
}//end synonym裡每一個字 | |
//針對取出來的同義字,把不重覆的擺回synosymList | |
for(int r=0;r<tempList.size();r++){ | |
if(!synonymList.contains(tempList.get(r))){ | |
synonymList.add(tempList.get(r)); | |
} | |
} | |
tempList.clear(); | |
}//end 掃幾層 | |
System.out.println(synonymList.size()); | |
} | |
} |
文章標籤
全站熱搜