我们在做大数据量网站的时候,有时遇到原始数据量过大的情况,如果都做成一个网站会让服务器负载过高、用户打开很慢,这时往往需要分为多个子网站来做,原始数据也需要进行分割,以便分别导入。
下面是将1个美国数据文件分割为50个各州数据文件的例子usa2state.php:
<?php //程序开头注释部分开始 /* usa.tsv分割为各州.tsv */ //程序开头注释部分结束 $file_usa="usa.tsv";//需要分割的整个美国文件 $file_ak="ak.tsv";//分割到各个州文件 $file_al="al.tsv"; $file_ar="ar.tsv"; $file_az="az.tsv"; $file_ca="ca.tsv"; $file_co="co.tsv"; $file_ct="ct.tsv"; $file_dc="dc.tsv"; $file_de="de.tsv"; $file_fl="fl.tsv"; $file_ga="ga.tsv"; $file_hi="hi.tsv"; $file_ia="ia.tsv"; $file_id="id.tsv"; $file_il="il.tsv"; $file_in="in.tsv"; $file_ks="ks.tsv"; $file_ky="ky.tsv"; $file_la="la.tsv"; $file_ma="ma.tsv"; $file_md="md.tsv"; $file_me="me.tsv"; $file_mi="mi.tsv"; $file_mn="mn.tsv"; $file_mo="mo.tsv"; $file_ms="ms.tsv"; $file_mt="mt.tsv"; $file_nc="nc.tsv"; $file_nd="nd.tsv"; $file_ne="ne.tsv"; $file_nh="nh.tsv"; $file_nj="nj.tsv"; $file_nm="nm.tsv"; $file_nv="nv.tsv"; $file_ny="ny.tsv"; $file_oh="oh.tsv"; $file_ok="ok.tsv"; $file_or="or.tsv"; $file_pa="pa.tsv"; $file_ri="ri.tsv"; $file_sc="sc.tsv"; $file_sd="sd.tsv"; $file_tn="tn.tsv"; $file_tx="tx.tsv"; $file_ut="ut.tsv"; $file_va="va.tsv"; $file_vt="vt.tsv"; $file_wa="wa.tsv"; $file_wi="wi.tsv"; $file_wv="wv.tsv"; $file_wy="wy.tsv"; $file_others="others.tsv";//51个州以外的数据 $fp_usa=fopen($file_usa,"r");//以只读的方式打开文件 $fp_ak=fopen($file_ak,"w");//以只写的方式打开文件 $fp_al=fopen($file_al,"w"); $fp_ar=fopen($file_ar,"w"); $fp_az=fopen($file_az,"w"); $fp_ca=fopen($file_ca,"w"); $fp_co=fopen($file_co,"w"); $fp_ct=fopen($file_ct,"w"); $fp_dc=fopen($file_dc,"w"); $fp_de=fopen($file_de,"w"); $fp_fl=fopen($file_fl,"w"); $fp_ga=fopen($file_ga,"w"); $fp_hi=fopen($file_hi,"w"); $fp_ia=fopen($file_ia,"w"); $fp_id=fopen($file_id,"w"); $fp_il=fopen($file_il,"w"); $fp_in=fopen($file_in,"w"); $fp_ks=fopen($file_ks,"w"); $fp_ky=fopen($file_ky,"w"); $fp_la=fopen($file_la,"w"); $fp_ma=fopen($file_ma,"w"); $fp_md=fopen($file_md,"w"); $fp_me=fopen($file_me,"w"); $fp_mi=fopen($file_mi,"w"); $fp_mn=fopen($file_mn,"w"); $fp_mo=fopen($file_mo,"w"); $fp_ms=fopen($file_ms,"w"); $fp_mt=fopen($file_mt,"w"); $fp_nc=fopen($file_nc,"w"); $fp_nd=fopen($file_nd,"w"); $fp_ne=fopen($file_ne,"w"); $fp_nh=fopen($file_nh,"w"); $fp_nj=fopen($file_nj,"w"); $fp_nm=fopen($file_nm,"w"); $fp_nv=fopen($file_nv,"w"); $fp_ny=fopen($file_ny,"w"); $fp_oh=fopen($file_oh,"w"); $fp_ok=fopen($file_ok,"w"); $fp_or=fopen($file_or,"w"); $fp_pa=fopen($file_pa,"w"); $fp_ri=fopen($file_ri,"w"); $fp_sc=fopen($file_sc,"w"); $fp_sd=fopen($file_sd,"w"); $fp_tn=fopen($file_tn,"w"); $fp_tx=fopen($file_tx,"w"); $fp_ut=fopen($file_ut,"w"); $fp_va=fopen($file_va,"w"); $fp_vt=fopen($file_vt,"w"); $fp_wa=fopen($file_wa,"w"); $fp_wi=fopen($file_wi,"w"); $fp_wv=fopen($file_wv,"w"); $fp_wy=fopen($file_wy,"w"); $fp_others=fopen($file_others,"w"); $count_line=0; $line_array=fgetcsv($fp_usa,0,"\t",chr(0));//读取文件的第一行 fputs($fp_ak, implode($line_array,"\t")."\n"); fputs($fp_al, implode($line_array,"\t")."\n"); fputs($fp_ar, implode($line_array,"\t")."\n"); fputs($fp_az, implode($line_array,"\t")."\n"); fputs($fp_ca, implode($line_array,"\t")."\n"); fputs($fp_co, implode($line_array,"\t")."\n"); fputs($fp_ct, implode($line_array,"\t")."\n"); fputs($fp_dc, implode($line_array,"\t")."\n"); fputs($fp_de, implode($line_array,"\t")."\n"); fputs($fp_fl, implode($line_array,"\t")."\n"); fputs($fp_ga, implode($line_array,"\t")."\n"); fputs($fp_hi, implode($line_array,"\t")."\n"); fputs($fp_ia, implode($line_array,"\t")."\n"); fputs($fp_id, implode($line_array,"\t")."\n"); fputs($fp_il, implode($line_array,"\t")."\n"); fputs($fp_in, implode($line_array,"\t")."\n"); fputs($fp_ks, implode($line_array,"\t")."\n"); fputs($fp_ky, implode($line_array,"\t")."\n"); fputs($fp_la, implode($line_array,"\t")."\n"); fputs($fp_ma, implode($line_array,"\t")."\n"); fputs($fp_md, implode($line_array,"\t")."\n"); fputs($fp_me, implode($line_array,"\t")."\n"); fputs($fp_mi, implode($line_array,"\t")."\n"); fputs($fp_mn, implode($line_array,"\t")."\n"); fputs($fp_mo, implode($line_array,"\t")."\n"); fputs($fp_ms, implode($line_array,"\t")."\n"); fputs($fp_mt, implode($line_array,"\t")."\n"); fputs($fp_nc, implode($line_array,"\t")."\n"); fputs($fp_nd, implode($line_array,"\t")."\n"); fputs($fp_ne, implode($line_array,"\t")."\n"); fputs($fp_nh, implode($line_array,"\t")."\n"); fputs($fp_nj, implode($line_array,"\t")."\n"); fputs($fp_nm, implode($line_array,"\t")."\n"); fputs($fp_nv, implode($line_array,"\t")."\n"); fputs($fp_ny, implode($line_array,"\t")."\n"); fputs($fp_oh, implode($line_array,"\t")."\n"); fputs($fp_ok, implode($line_array,"\t")."\n"); fputs($fp_or, implode($line_array,"\t")."\n"); fputs($fp_pa, implode($line_array,"\t")."\n"); fputs($fp_ri, implode($line_array,"\t")."\n"); fputs($fp_sc, implode($line_array,"\t")."\n"); fputs($fp_sd, implode($line_array,"\t")."\n"); fputs($fp_tn, implode($line_array,"\t")."\n"); fputs($fp_tx, implode($line_array,"\t")."\n"); fputs($fp_ut, implode($line_array,"\t")."\n"); fputs($fp_va, implode($line_array,"\t")."\n"); fputs($fp_vt, implode($line_array,"\t")."\n"); fputs($fp_wa, implode($line_array,"\t")."\n"); fputs($fp_wi, implode($line_array,"\t")."\n"); fputs($fp_wv, implode($line_array,"\t")."\n"); fputs($fp_wy, implode($line_array,"\t")."\n"); fputs($fp_others, implode($line_array,"\t")."\n"); while(!(feof($fp_usa))) { $line_array=fgetcsv($fp_usa,0,"\t",chr(0));//读取文件的一行 $state=strtolower($line_array[10]);//读取其中州的代码 switch ($state) {//分别写到对应的州文件 case "ak": fputs($fp_ak, implode($line_array,"\t")."\n"); break; case "al": fputs($fp_al, implode($line_array,"\t")."\n"); break; case "ar": fputs($fp_ar, implode($line_array,"\t")."\n"); break; case "az": fputs($fp_az, implode($line_array,"\t")."\n"); break; case "ca": fputs($fp_ca, implode($line_array,"\t")."\n"); break; case "co": fputs($fp_co, implode($line_array,"\t")."\n"); break; case "ct": fputs($fp_ct, implode($line_array,"\t")."\n"); break; case "dc": fputs($fp_dc, implode($line_array,"\t")."\n"); break; case "de": fputs($fp_de, implode($line_array,"\t")."\n"); break; case "fl": fputs($fp_fl, implode($line_array,"\t")."\n"); break; case "ga": fputs($fp_ga, implode($line_array,"\t")."\n"); break; case "hi": fputs($fp_hi, implode($line_array,"\t")."\n"); break; case "ia": fputs($fp_ia, implode($line_array,"\t")."\n"); break; case "id": fputs($fp_id, implode($line_array,"\t")."\n"); break; case "il": fputs($fp_il, implode($line_array,"\t")."\n"); break; case "in": fputs($fp_in, implode($line_array,"\t")."\n"); break; case "ks": fputs($fp_ks, implode($line_array,"\t")."\n"); break; case "ky": fputs($fp_ky, implode($line_array,"\t")."\n"); break; case "la": fputs($fp_la, implode($line_array,"\t")."\n"); break; case "ma": fputs($fp_ma, implode($line_array,"\t")."\n"); break; case "md": fputs($fp_md, implode($line_array,"\t")."\n"); break; case "me": fputs($fp_me, implode($line_array,"\t")."\n"); break; case "mi": fputs($fp_mi, implode($line_array,"\t")."\n"); break; case "mn": fputs($fp_mn, implode($line_array,"\t")."\n"); break; case "mo": fputs($fp_mo, implode($line_array,"\t")."\n"); break; case "ms": fputs($fp_ms, implode($line_array,"\t")."\n"); break; case "mt": fputs($fp_mt, implode($line_array,"\t")."\n"); break; case "nc": fputs($fp_nc, implode($line_array,"\t")."\n"); break; case "nd": fputs($fp_nd, implode($line_array,"\t")."\n"); break; case "ne": fputs($fp_ne, implode($line_array,"\t")."\n"); break; case "nh": fputs($fp_nh, implode($line_array,"\t")."\n"); break; case "nj": fputs($fp_nj, implode($line_array,"\t")."\n"); break; case "nm": fputs($fp_nm, implode($line_array,"\t")."\n"); break; case "nv": fputs($fp_nv, implode($line_array,"\t")."\n"); break; case "ny": fputs($fp_ny, implode($line_array,"\t")."\n"); break; case "oh": fputs($fp_oh, implode($line_array,"\t")."\n"); break; case "ok": fputs($fp_ok, implode($line_array,"\t")."\n"); break; case "or": fputs($fp_or, implode($line_array,"\t")."\n"); break; case "pa": fputs($fp_pa, implode($line_array,"\t")."\n"); break; case "ri": fputs($fp_ri, implode($line_array,"\t")."\n"); break; case "sc": fputs($fp_sc, implode($line_array,"\t")."\n"); break; case "sd": fputs($fp_sd, implode($line_array,"\t")."\n"); break; case "tn": fputs($fp_tn, implode($line_array,"\t")."\n"); break; case "tx": fputs($fp_tx, implode($line_array,"\t")."\n"); break; case "ut": fputs($fp_ut, implode($line_array,"\t")."\n"); break; case "va": fputs($fp_va, implode($line_array,"\t")."\n"); break; case "vt": fputs($fp_vt, implode($line_array,"\t")."\n"); break; case "wa": fputs($fp_wa, implode($line_array,"\t")."\n"); break; case "wi": fputs($fp_wi, implode($line_array,"\t")."\n"); break; case "wv": fputs($fp_wv, implode($line_array,"\t")."\n"); break; case "wy": fputs($fp_wy, implode($line_array,"\t")."\n"); break; default: fputs($fp_others, implode($line_array,"\t")."\n"); } $count_line++; print "\n$count_line : $state\n"; //print_r ($line_array); //if ($count_line>=3) exit; } print "total=$count_line\n"; fclose($fp_usa); fclose($fp_ak); fclose($fp_al); fclose($fp_ar); fclose($fp_az); fclose($fp_ca); fclose($fp_co); fclose($fp_ct); fclose($fp_dc); fclose($fp_de); fclose($fp_fl); fclose($fp_ga); fclose($fp_hi); fclose($fp_ia); fclose($fp_id); fclose($fp_il); fclose($fp_in); fclose($fp_ks); fclose($fp_ky); fclose($fp_la); fclose($fp_ma); fclose($fp_md); fclose($fp_me); fclose($fp_mi); fclose($fp_mn); fclose($fp_mo); fclose($fp_ms); fclose($fp_mt); fclose($fp_nc); fclose($fp_nd); fclose($fp_ne); fclose($fp_nh); fclose($fp_nj); fclose($fp_nm); fclose($fp_nv); fclose($fp_ny); fclose($fp_oh); fclose($fp_ok); fclose($fp_or); fclose($fp_pa); fclose($fp_ri); fclose($fp_sc); fclose($fp_sd); fclose($fp_tn); fclose($fp_tx); fclose($fp_ut); fclose($fp_va); fclose($fp_vt); fclose($fp_wa); fclose($fp_wi); fclose($fp_wv); fclose($fp_wy); fclose($fp_others); ?>
程序可以用但写得很丑,没有用数组、循环或者函数子程序什么的,简单可用而已,呵呵。
评论