基于iciba的英汉词典
通过对金山在线词典的分析,我发现在他的词库里有的单词的url都是以"http://www.iciba.com/" + 单词 的形式出现的。
在之前我写过一个获得网页源代码的代码,我在此基础上写了一个获得英文单词在iciba上面的获得单词翻译的程序,那么接下来先要查单词就可以不用打开浏览器那么麻烦了。
getWordName函数用于获得单词对应的实际单词,因为很有可能我要查的单词实际上是没有的,那么他就会跳转到另一个和这个单词表交响的单词的url上。 getTranslation函数用于获得单词的中文翻译。
BufferedReader reader = new BufferedReader(new InputStreamReader(urlConnection.getInputStream(), "utf-8"));这句话最后添加的"utf-8"解决了获取网页源代码里面的中文乱码问题。
代码:
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
public class EnglishChineseTranslater {
public static String getWordName(String word) throws Exception {
String urlString = "http://www.iciba.com/" + word;
URL url = new URL(urlString);
HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
BufferedReader reader = new BufferedReader(new InputStreamReader(urlConnection.getInputStream(), "utf-8"));//"utf-8"解决中文乱码问题
String line;
String ans = "";
for(int i=0;i<8;i++) reader.readLine();
line = reader.readLine();
int len = line.length();
for(int i=len-4;i > 0 && line.charAt(i) != '/';i--)
ans = line.charAt(i) + ans;
return ans;
}
public static String getTranslation(String word) throws Exception {
String urlString = "http://www.iciba.com/" + word;
URL url = new URL(urlString);
HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
BufferedReader reader = new BufferedReader(new InputStreamReader(urlConnection.getInputStream(), "utf-8"));//"utf-8"解决中文乱码问题
String line;
String ans = "";
while ((line = reader.readLine()) != null){
line = line.trim();
if(line.equals("<span class=\"label_list\">")) {
while((line = reader.readLine()) != null) {
line = line.trim();
if(false == line.substring(0, 7).equals("<label>"))
break;
line = line.substring(7, line.length()-8);
//System.out.println(line);
ans += line;
}
break;
}
}
return ans;
}
public static void main(String[] args) throws Exception {
String word = "apple";
String trueWord = getWordName(word);
String translation = getTranslation(word);
System.out.println(trueWord + ": " + translation);
}
}