大文本文件分割成若幹小文件的PHP程序

By James Qi, 2013年8月29日
　　我們在做大數據量網站的時候，有時遇到原始數據量過大的情況，如果都做成一個網站會讓服務器負載過高、用戶打開很慢，這時往往需要分為多個子網站來做，原始數據也需要進行分割，以便分别導入。
　　下面是将1個美國數據文件分割為50個各州數據文件的例子usa2state.php：
<?php
//程序開頭注釋部分開始
/*

usa.tsv分割為各州.tsv

*/
//程序開頭注釋部分結束

$file_usa="usa.tsv";//需要分割的整個美國文件

$file_ak="ak.tsv";//分割到各個州文件
$file_al="al.tsv";
$file_ar="ar.tsv";
$file_az="az.tsv";
$file_ca="ca.tsv";
$file_co="co.tsv";
$file_ct="ct.tsv";
$file_dc="dc.tsv";
$file_de="de.tsv";
$file_fl="fl.tsv";
$file_ga="ga.tsv";
$file_hi="hi.tsv";
$file_ia="ia.tsv";
$file_id="id.tsv";
$file_il="il.tsv";
$file_in="in.tsv";
$file_ks="ks.tsv";
$file_ky="ky.tsv";
$file_la="la.tsv";
$file_ma="ma.tsv";
$file_md="md.tsv";
$file_me="me.tsv";
$file_mi="mi.tsv";
$file_mn="mn.tsv";
$file_mo="mo.tsv";
$file_ms="ms.tsv";
$file_mt="mt.tsv";
$file_nc="nc.tsv";
$file_nd="nd.tsv";
$file_ne="ne.tsv";
$file_nh="nh.tsv";
$file_nj="nj.tsv";
$file_nm="nm.tsv";
$file_nv="nv.tsv";
$file_ny="ny.tsv";
$file_oh="oh.tsv";
$file_ok="ok.tsv";
$file_or="or.tsv";
$file_pa="pa.tsv";
$file_ri="ri.tsv";
$file_sc="sc.tsv";
$file_sd="sd.tsv";
$file_tn="tn.tsv";
$file_tx="tx.tsv";
$file_ut="ut.tsv";
$file_va="va.tsv";
$file_vt="vt.tsv";
$file_wa="wa.tsv";
$file_wi="wi.tsv";
$file_wv="wv.tsv";
$file_wy="wy.tsv";
$file_others="others.tsv";//51個州以外的數據

$fp_usa=fopen($file_usa,"r");//以隻讀的方式打開文件
$fp_ak=fopen($file_ak,"w");//以隻寫的方式打開文件
$fp_al=fopen($file_al,"w");
$fp_ar=fopen($file_ar,"w");
$fp_az=fopen($file_az,"w");
$fp_ca=fopen($file_ca,"w");
$fp_co=fopen($file_co,"w");
$fp_ct=fopen($file_ct,"w");
$fp_dc=fopen($file_dc,"w");
$fp_de=fopen($file_de,"w");
$fp_fl=fopen($file_fl,"w");
$fp_ga=fopen($file_ga,"w");
$fp_hi=fopen($file_hi,"w");
$fp_ia=fopen($file_ia,"w");
$fp_id=fopen($file_id,"w");
$fp_il=fopen($file_il,"w");
$fp_in=fopen($file_in,"w");
$fp_ks=fopen($file_ks,"w");
$fp_ky=fopen($file_ky,"w");
$fp_la=fopen($file_la,"w");
$fp_ma=fopen($file_ma,"w");
$fp_md=fopen($file_md,"w");
$fp_me=fopen($file_me,"w");
$fp_mi=fopen($file_mi,"w");
$fp_mn=fopen($file_mn,"w");
$fp_mo=fopen($file_mo,"w");
$fp_ms=fopen($file_ms,"w");
$fp_mt=fopen($file_mt,"w");
$fp_nc=fopen($file_nc,"w");
$fp_nd=fopen($file_nd,"w");
$fp_ne=fopen($file_ne,"w");
$fp_nh=fopen($file_nh,"w");
$fp_nj=fopen($file_nj,"w");
$fp_nm=fopen($file_nm,"w");
$fp_nv=fopen($file_nv,"w");
$fp_ny=fopen($file_ny,"w");
$fp_oh=fopen($file_oh,"w");
$fp_ok=fopen($file_ok,"w");
$fp_or=fopen($file_or,"w");
$fp_pa=fopen($file_pa,"w");
$fp_ri=fopen($file_ri,"w");
$fp_sc=fopen($file_sc,"w");
$fp_sd=fopen($file_sd,"w");
$fp_tn=fopen($file_tn,"w");
$fp_tx=fopen($file_tx,"w");
$fp_ut=fopen($file_ut,"w");
$fp_va=fopen($file_va,"w");
$fp_vt=fopen($file_vt,"w");
$fp_wa=fopen($file_wa,"w");
$fp_wi=fopen($file_wi,"w");
$fp_wv=fopen($file_wv,"w");
$fp_wy=fopen($file_wy,"w");
$fp_others=fopen($file_others,"w");

$count_line=0;

$line_array=fgetcsv($fp_usa,0,"\t",chr(0));//讀取文件的第一行
fputs($fp_ak, implode($line_array,"\t")."\n");
fputs($fp_al, implode($line_array,"\t")."\n");
fputs($fp_ar, implode($line_array,"\t")."\n");
fputs($fp_az, implode($line_array,"\t")."\n");
fputs($fp_ca, implode($line_array,"\t")."\n");
fputs($fp_co, implode($line_array,"\t")."\n");
fputs($fp_ct, implode($line_array,"\t")."\n");
fputs($fp_dc, implode($line_array,"\t")."\n");
fputs($fp_de, implode($line_array,"\t")."\n");
fputs($fp_fl, implode($line_array,"\t")."\n");
fputs($fp_ga, implode($line_array,"\t")."\n");
fputs($fp_hi, implode($line_array,"\t")."\n");
fputs($fp_ia, implode($line_array,"\t")."\n");
fputs($fp_id, implode($line_array,"\t")."\n");
fputs($fp_il, implode($line_array,"\t")."\n");
fputs($fp_in, implode($line_array,"\t")."\n");
fputs($fp_ks, implode($line_array,"\t")."\n");
fputs($fp_ky, implode($line_array,"\t")."\n");
fputs($fp_la, implode($line_array,"\t")."\n");
fputs($fp_ma, implode($line_array,"\t")."\n");
fputs($fp_md, implode($line_array,"\t")."\n");
fputs($fp_me, implode($line_array,"\t")."\n");
fputs($fp_mi, implode($line_array,"\t")."\n");
fputs($fp_mn, implode($line_array,"\t")."\n");
fputs($fp_mo, implode($line_array,"\t")."\n");
fputs($fp_ms, implode($line_array,"\t")."\n");
fputs($fp_mt, implode($line_array,"\t")."\n");
fputs($fp_nc, implode($line_array,"\t")."\n");
fputs($fp_nd, implode($line_array,"\t")."\n");
fputs($fp_ne, implode($line_array,"\t")."\n");
fputs($fp_nh, implode($line_array,"\t")."\n");
fputs($fp_nj, implode($line_array,"\t")."\n");
fputs($fp_nm, implode($line_array,"\t")."\n");
fputs($fp_nv, implode($line_array,"\t")."\n");
fputs($fp_ny, implode($line_array,"\t")."\n");
fputs($fp_oh, implode($line_array,"\t")."\n");
fputs($fp_ok, implode($line_array,"\t")."\n");
fputs($fp_or, implode($line_array,"\t")."\n");
fputs($fp_pa, implode($line_array,"\t")."\n");
fputs($fp_ri, implode($line_array,"\t")."\n");
fputs($fp_sc, implode($line_array,"\t")."\n");
fputs($fp_sd, implode($line_array,"\t")."\n");
fputs($fp_tn, implode($line_array,"\t")."\n");
fputs($fp_tx, implode($line_array,"\t")."\n");
fputs($fp_ut, implode($line_array,"\t")."\n");
fputs($fp_va, implode($line_array,"\t")."\n");
fputs($fp_vt, implode($line_array,"\t")."\n");
fputs($fp_wa, implode($line_array,"\t")."\n");
fputs($fp_wi, implode($line_array,"\t")."\n");
fputs($fp_wv, implode($line_array,"\t")."\n");
fputs($fp_wy, implode($line_array,"\t")."\n");
fputs($fp_others, implode($line_array,"\t")."\n");

while(!(feof($fp_usa))) {
$line_array=fgetcsv($fp_usa,0,"\t",chr(0));//讀取文件的一行
$state=strtolower($line_array[10]);//讀取其中州的代碼
switch ($state) {//分别寫到對應的州文件
case "ak":
  fputs($fp_ak, implode($line_array,"\t")."\n");
  break;
case "al":
  fputs($fp_al, implode($line_array,"\t")."\n");
  break;
case "ar":
  fputs($fp_ar, implode($line_array,"\t")."\n");
  break;
case "az":
  fputs($fp_az, implode($line_array,"\t")."\n");
  break;
case "ca":
  fputs($fp_ca, implode($line_array,"\t")."\n");
  break;
case "co":
  fputs($fp_co, implode($line_array,"\t")."\n");
  break;
case "ct":
  fputs($fp_ct, implode($line_array,"\t")."\n");
  break;
case "dc":
  fputs($fp_dc, implode($line_array,"\t")."\n");
  break;
case "de":
  fputs($fp_de, implode($line_array,"\t")."\n");
  break;
case "fl":
  fputs($fp_fl, implode($line_array,"\t")."\n");
  break;
case "ga":
  fputs($fp_ga, implode($line_array,"\t")."\n");
  break;
case "hi":
  fputs($fp_hi, implode($line_array,"\t")."\n");
  break;
case "ia":
  fputs($fp_ia, implode($line_array,"\t")."\n");
  break;
case "id":
  fputs($fp_id, implode($line_array,"\t")."\n");
  break;
case "il":
  fputs($fp_il, implode($line_array,"\t")."\n");
  break;
case "in":
  fputs($fp_in, implode($line_array,"\t")."\n");
  break;
case "ks":
  fputs($fp_ks, implode($line_array,"\t")."\n");
  break;
case "ky":
  fputs($fp_ky, implode($line_array,"\t")."\n");
  break;
case "la":
  fputs($fp_la, implode($line_array,"\t")."\n");
  break;
case "ma":
  fputs($fp_ma, implode($line_array,"\t")."\n");
  break;
case "md":
  fputs($fp_md, implode($line_array,"\t")."\n");
  break;
case "me":
  fputs($fp_me, implode($line_array,"\t")."\n");
  break;
case "mi":
  fputs($fp_mi, implode($line_array,"\t")."\n");
  break;
case "mn":
  fputs($fp_mn, implode($line_array,"\t")."\n");
  break;
case "mo":
  fputs($fp_mo, implode($line_array,"\t")."\n");
  break;
case "ms":
  fputs($fp_ms, implode($line_array,"\t")."\n");
  break;
case "mt":
  fputs($fp_mt, implode($line_array,"\t")."\n");
  break;
case "nc":
  fputs($fp_nc, implode($line_array,"\t")."\n");
  break;
case "nd":
  fputs($fp_nd, implode($line_array,"\t")."\n");
  break;
case "ne":
  fputs($fp_ne, implode($line_array,"\t")."\n");
  break;
case "nh":
  fputs($fp_nh, implode($line_array,"\t")."\n");
  break;
case "nj":
  fputs($fp_nj, implode($line_array,"\t")."\n");
  break;
case "nm":
  fputs($fp_nm, implode($line_array,"\t")."\n");
  break;
case "nv":
  fputs($fp_nv, implode($line_array,"\t")."\n");
  break;
case "ny":
  fputs($fp_ny, implode($line_array,"\t")."\n");
  break;
case "oh":
  fputs($fp_oh, implode($line_array,"\t")."\n");
  break;
case "ok":
  fputs($fp_ok, implode($line_array,"\t")."\n");
  break;
case "or":
  fputs($fp_or, implode($line_array,"\t")."\n");
  break;
case "pa":
  fputs($fp_pa, implode($line_array,"\t")."\n");
  break;
case "ri":
  fputs($fp_ri, implode($line_array,"\t")."\n");
  break;
case "sc":
  fputs($fp_sc, implode($line_array,"\t")."\n");
  break;
case "sd":
  fputs($fp_sd, implode($line_array,"\t")."\n");
  break;
case "tn":
  fputs($fp_tn, implode($line_array,"\t")."\n");
  break;
case "tx":
  fputs($fp_tx, implode($line_array,"\t")."\n");
  break;
case "ut":
  fputs($fp_ut, implode($line_array,"\t")."\n");
  break;
case "va":
  fputs($fp_va, implode($line_array,"\t")."\n");
  break;
case "vt":
  fputs($fp_vt, implode($line_array,"\t")."\n");
  break;
case "wa":
  fputs($fp_wa, implode($line_array,"\t")."\n");
  break;
case "wi":
  fputs($fp_wi, implode($line_array,"\t")."\n");
  break;
case "wv":
  fputs($fp_wv, implode($line_array,"\t")."\n");
  break;
case "wy":
  fputs($fp_wy, implode($line_array,"\t")."\n");
  break;
default:
  fputs($fp_others, implode($line_array,"\t")."\n");
}
$count_line++;
print "\n$count_line : $state\n";
//print_r ($line_array);
//if ($count_line>=3) exit;
}
print "total=$count_line\n";

fclose($fp_usa);
fclose($fp_ak);
fclose($fp_al);
fclose($fp_ar);
fclose($fp_az);
fclose($fp_ca);
fclose($fp_co);
fclose($fp_ct);
fclose($fp_dc);
fclose($fp_de);
fclose($fp_fl);
fclose($fp_ga);
fclose($fp_hi);
fclose($fp_ia);
fclose($fp_id);
fclose($fp_il);
fclose($fp_in);
fclose($fp_ks);
fclose($fp_ky);
fclose($fp_la);
fclose($fp_ma);
fclose($fp_md);
fclose($fp_me);
fclose($fp_mi);
fclose($fp_mn);
fclose($fp_mo);
fclose($fp_ms);
fclose($fp_mt);
fclose($fp_nc);
fclose($fp_nd);
fclose($fp_ne);
fclose($fp_nh);
fclose($fp_nj);
fclose($fp_nm);
fclose($fp_nv);
fclose($fp_ny);
fclose($fp_oh);
fclose($fp_ok);
fclose($fp_or);
fclose($fp_pa);
fclose($fp_ri);
fclose($fp_sc);
fclose($fp_sd);
fclose($fp_tn);
fclose($fp_tx);
fclose($fp_ut);
fclose($fp_va);
fclose($fp_vt);
fclose($fp_wa);
fclose($fp_wi);
fclose($fp_wv);
fclose($fp_wy);
fclose($fp_others);

?>
　　程序可以用但寫得很醜，沒有用數組、循環或者函數子程序什麼的，簡單可用而已，呵呵。
自由标簽
文件
PHP
您的名字
CAPTCHA
此问题用于测试您是否是人类访问者并防止自动提交垃圾信息。
大文本文件分割成若幹小文件的PHP程序

评论

Plain text

大文本文件分割成若幹小文件的PHP程序

评论

Plain text

站内搜索