package ma;
import java.io.*;
import java.util.*;
import java.util.regex.*;
import cc.mallet.pipe.*;
import cc.mallet.pipe.iterator.*;
import cc.mallet.types.*;
public class pro {
Pipe pipe;
public static void main (String[] args) throws IOException {
pro test = new pro();
}
public pro() {
pipe = buildPipe();
}
public Pipe buildPipe() {
ArrayList pipeList = new ArrayList();
pipeList.add(new Input2CharSequence("UTF-8"));
Pattern tokenPattern =Pattern.compile("[\\p{L}\\p{N}_]+");
pipeList.add(new CharSequence2TokenSequence(tokenPattern));
pipeList.add(new TokenSequenceLowercase());
pipeList.add(new TokenSequenceRemoveStopwords(false, false));
pipeList.add(new TokenSequence2FeatureSequence());
pipeList.add(new Target2Label());
pipeList.add(new FeatureSequence2FeatureVector());
pipeList.add(new PrintInputAndTarget());
return new SerialPipes(pipeList);
}
}
最終更新:2016年05月03日 05:05