package ie.dit.comp.lukejia.fyp.swn; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.util.HashMap; import java.util.Iterator; import java.util.Set; import java.util.Vector; public class SWN3 { private String pathToSWN = "D:\study\Quarter1\IRDM\Project\resource\SWN.txt"; private HashMap<String, Double> _dict; public SWN3(){ _dict = new HashMap<String, Double>(); HashMap<String, Vector<Double>> _temp = new HashMap<String, Vector<Double>>(); try{ BufferedReader csv = new BufferedReader(new FileReader(pathToSWN)); String line = ""; while((line = csv.readLine()) != null) { String[] data = line.split("\t"); Double score = Double.parseDouble(data[2])-Double.parseDouble(data[3]); String[] words = data[4].split(" "); for(String w:words) { String[] w_n = w.split("#"); w_n[0] += "#"+data[0]; int index = Integer.parseInt(w_n[1])-1; if(_temp.containsKey(w_n[0])) { Vector<Double> v = _temp.get(w_n[0]); if(index>v.size()) for(int i = v.size();i<index; i++) v.add(0.0); v.add(index, score); _temp.put(w_n[0], v); } else { Vector<Double> v = new Vector<Double>(); for(int i = 0;i<index; i++) v.add(0.0); v.add(index, score); _temp.put(w_n[0], v); } } } Set<String> temp = _temp.keySet(); for (Iterator<String> iterator = temp.iterator(); iterator.hasNext();) { String word = (String) iterator.next(); Vector<Double> v = _temp.get(word); double score = 0.0; double sum = 0.0; for(int i = 0; i < v.size(); i++) score += ((double)1/(double)(i+1))*v.get(i); for(int i = 1; i<=v.size(); i++) sum += (double)1/(double)i; score /= sum; String sent = ""; if(score>=0.75) sent = "strong_positive"; else if(score > 0.25 && score<=0.5) sent = "positive"; else if(score > 0 && score>=0.25) sent = "weak_positive"; else if(score < 0 && score>=-0.25) sent = "weak_negative"; else if(score < -0.25 && score>=-0.5) sent = "negative"; else if(score<=-0.75) sent = "strong_negative"; _dict.put(word, score); } } catch(Exception e){e.printStackTrace();} } public Double extract(String word) { Double total = new Double(0); if(_dict.get(word+"#n") != null) total = _dict.get(word+"#n") + total; if(_dict.get(word+"#a") != null) total = _dict.get(word+"#a") + total; if(_dict.get(word+"#r") != null) total = _dict.get(word+"#r") + total; if(_dict.get(word+"#v") != null) total = _dict.get(word+"#v") + total; return total; } public static void main(String[] args) { SWN3 test = new SWN3(); String sentence="Hello have a Super awesome great day"; String[] words = sentence.split("\s+"); double totalScore = 0; for(String word : words) { word = word.replaceAll("([^a-zA-Z\s])", ""); if (test.extract(word) == null) continue; totalScore += test.extract(word); } System.out.println(totalScore); } } 报错 |
|
检查下
String[] data = line.split(“\t”); System.out.println(data.length); Double score = Double.parseDouble(data[2])-Double.parseDouble(data[3]); |
|
我调试了, 这个结果是1 应该没有越界啊? |
|
长度是1 ,确要取 data[2] 就越界了啊 |
|
那是什么情况? 我总不能不取吧? 你熟悉这个么? 是进行语义分析方面的 |
|
10分 |
不熟悉你的业务,语义场景 |
谢啦啊 不过这样我感觉就失去意义了 |
|
10分 |
当读到第31行时就不想再读下去了,data[4],w_n[1],这些写法是一定要保护的。
首先你认为你的数据被split开之后,一定是有多少项或者至少有多少项,要加一个判断的: if (data == null || data.length < 5) { //或者返回,或者抛出一个有意义的异常 throw new IllegalStateException("too short data length " + data == null ? "<null>" : "" + data.length); } if (w_n == null || w_n.length < 2) { throw new IllegalStateException("too short w_n length " + w_n == null ? "<null>" : "" + w_n.length); } 如果有必须的话,在抛出的异常信息里把当前行是第几行,当前行的内容是什么都包括进去,这样在debug时一下子知道是因为读到了文件里的哪一行出的问题,然后那一行的内容也在,可以对这一行单独分析为什么出问题,比如你按\t去split的,也许那一条的某一个分隔就不是\t,而是空格之类的,或者那行的数据里就是包括了一个\t,打乱了你对数据位置的假设,你肉眼看不出来,但程序就是出错了。 另外,不管你是不是对你的输入数据有信心,程序上对null和arr.length的保护总是有必要的,起码会在出问题时提供debug所需的第一手信息。 |
额 谢啦 我写代码不是很多 不过问题已经解决了 确实是没用判断长度的原因 |