王新阳

wangxinyang

PHPExcel使用过滤器按块读取excel

PHPExcel读取EXCEL非常消耗内存(尤其是上万条数据,很容易出现内存不足),可以使用分批、异步读取,思路:
1、获取表中记录总行数
2、每次读取N条写入数据库,并返回最后一条的行号
3、根据上次返回的行号使用AJAX等方式循环第2步
相应代码可从下方示例中分离

//加载PHPExcel类库
require_once FCPATH.'app/manage/libraries/PHPExcel/IOFactory.php';
//按块读取EXCEL,减少内存占用
class chunkReadFilter implements PHPExcel_Reader_IReadFilter{
	private $_startRow = 0;
	private $_endRow = 0;
	//设置要读取的块开始行、结束行
	public function setRows($startRow, $chunkSize) {
		$this->_startRow	= $startRow;
		$this->_endRow		= $startRow + $chunkSize;
	}
	//确定当前行是否读取
	public function readCell($column, $row, $worksheetName = '') {
		//读取首行(标题行)和块区间内的行数据
		//return (($row == 1) || ($row >= $this->_startRow && $row < $this->_endRow)) ? TRUE : FALSE;
		//只读取块区间内的行数据
		return ($row >= $this->_startRow && $row < $this->_endRow) ? TRUE : FALSE;
	}
}

//执行导入
function import_do($arr_fields){
	$excelfields = P('fields'); //要导入的字段和顺序
	$excelfile = P('excelfile'); //要导入的excel文件
	$skiplines = P('skiplines'); //跳过行数
	$deletefile = P('deletefile'); //导入成功后删除excel文件
		
	if(!$excelfields){
		return $this->_display('请添加要导入的字段');
	}else{
		$excelfields = explode('|', $excelfields);
	}
	if(! $excelfile)return $this->_display('请上传要导入的文件');
	is_lng($skiplines) OR $skiplines=0;
	$deletefile = $deletefile=='1' ? TRUE : FALSE;
		
	$time=$this->input->server('REQUEST_TIME');
		
	//设置超时时间为99秒
	set_time_limit(99);
		
	$excelfile = FCPATH.substr($excelfile, 1);
	$inputFileType = PHPExcel_IOFactory::identify($excelfile);
	$objReader = PHPExcel_IOFactory::createReader($inputFileType);
	$objReader->setReadDataOnly(true);
		
	$chunkSize = 500; //每次读取记录数
	$chunkFilter = new chunkReadFilter(); //实例化按块读取过滤器
	$objReader->setReadFilter($chunkFilter); //使用过滤器
	$chunkFilter->setRows(1,1); //从第一条开始读取1条,据此获取activeShieetIndex
	$objPHPExcel = $objReader->load($excelfile);
	$activeSheetIndex = $objPHPExcel->getActiveSheetIndex();
		
	$worksheetData = $objReader->listWorksheetInfo($excelfile); // 列出工作表列表
	$activeSheetInfo = $worksheetData[$activeSheetIndex];
/*
 * $activeSheetInfo
Array(
    [worksheetName] => 信息
    [lastColumnLetter] => AQ
    [lastColumnIndex] => 42
    [totalRows] => 21
    [totalColumns] => 43
)
*/
	for ($startRow = $skiplines+1+5000; $startRow <= $activeSheetInfo['totalRows']; $startRow += $chunkSize) {
		echo 'Loading WorkSheet using configurable filter for headings row 1 and for rows ',$startRow,' to ',($startRow+$chunkSize-1),'<br />';
		//设置本次循环要读取的记录
		$chunkFilter->setRows($startRow,$chunkSize);
		//根据过滤器读取数据
		$objPHPExcel = $objReader->load($excelfile);
		$sheetData = $objPHPExcel->getActiveSheet()->toArray(null,true,true,true);
		$sheetData = array_slice($sheetData, $startRow-1);
		//echo '<pre>';
		//print_r($sheetData);
	}
	echo memory_get_peak_usage(), ', ', memory_get_usage(), ', ', (microtime(true)-$time);
	exit;
}
2017-09-14
2024-05-03 星期五 农历三月二十五