package ball;
import tool.*;
import java.io.*;
public class pro {
int s,datanumber;
String[] data=new String[5000];
String[] subdata=new String[700];
int number;
String[] code=new String[5000];
String[] link=new String[5000];
int number_sub;
public static void main(String[] args) {
pro te=new pro();
}
pro(){
readfile sub4=new readfile();
sub4.makedata("data.csv","UTF-8");
data=sub4.data;
number_sub=sub4.datanumber;
for(s=2;s<number_sub+1;s++){
String[] px=data[s].split(",");
link[s]="";
code[s]="";
if(px.length>8)link[s]=px[7];
if(px.length>8)code[s]=px[8];
}
int tr;
for(tr=2;tr<number_sub+1;tr++){
if(code[tr].length()>0){
String urlx="https://ja.wikipedia.org/"+link[tr];
getdata sub=new getdata();
sub.makedata(urlx,"UTF-8");
data=sub.data;
datanumber=sub.datanumber;
int s1=1;
for(s=1;s<datanumber+1;s++){
if(data[s].indexOf("年度別打撃成績")>-1)s1=s;
}
int s2=0;
for(s=s1;s<datanumber+1;s++){
int h=0;
if(data[s].indexOf("</table>")>-1)h=h+1;
if(s2==0)h=h+1;
if(h==2)s2=s;
}
String str="";
if(s1>5){
for(s=s1;s<s2;s++){
str=str+data[s];
}
}
int sx;
str=str.replace("th","td");
String[] x=str.split("</tr>");
number=x.length;
for(s=0;s<x.length;s++){
String[] x1=x[s].split("</td>");
String strx=clean_str(x1[0]);
for(sx=1;sx<x1.length;sx++){
strx=strx+","+clean_str(x1[sx]);
}
subdata[s]=strx;
}
if(s1>3){
writexml("data/"+code[tr]+".csv");
}
}
}
}
void writexml(String file){
int s,sx;
try{
PrintWriter pw = new PrintWriter (new BufferedWriter(new FileWriter(file)));
for(s=0;s<number+1;s++){
pw.println(subdata[s]);
}
System.out.println("ファイルに書きこみました。");
pw.close();
} catch(IOException ep){ System.out.println("入出力エラーです。"); }
}
String clean_str(String str){
int m;
String strx=str;
for(m=1;m<15;m++){
strx=fresh(strx);
}
strx=strx.replace(" ","");
strx=strx.replace(" ","");
strx=strx.replace(";","");
return strx;
}
String fresh(String str){
int p1=str.indexOf("<");
int p2=str.indexOf(">",p1);
String sp="";
if(p1>-1)sp=str.substring(p1,p2+1);
String k=str.replace(sp,"");
return k;
}
}
最終更新:2017年08月21日 09:14