csvparser.class.inc.php 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. <?php
  2. // Copyright (C) 2010 Combodo SARL
  3. //
  4. // This program is free software; you can redistribute it and/or modify
  5. // it under the terms of the GNU General Public License as published by
  6. // the Free Software Foundation; version 3 of the License.
  7. //
  8. // This program is distributed in the hope that it will be useful,
  9. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. // GNU General Public License for more details.
  12. //
  13. // You should have received a copy of the GNU General Public License
  14. // along with this program; if not, write to the Free Software
  15. // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  16. /**
  17. * CSV parser
  18. *
  19. * @author Erwan Taloc <erwan.taloc@combodo.com>
  20. * @author Romain Quetiez <romain.quetiez@combodo.com>
  21. * @author Denis Flaven <denis.flaven@combodo.com>
  22. * @license http://www.opensource.org/licenses/gpl-3.0.html LGPL
  23. */
  24. class CSVParserException extends CoreException
  25. {
  26. }
  27. define('stSTARTING', 1); //grey zone: the type is undetermined
  28. define('stRAW', 2); //building a non-qualified string
  29. define('stQUALIFIED', 3); //building qualified string
  30. define('stESCAPED', 4); //just encountered an escape char
  31. define('evBLANK', 0);
  32. define('evSEPARATOR', 1);
  33. define('evNEWLINE', 2);
  34. define('evTEXTQUAL', 3); // used for escaping as well
  35. define('evOTHERCHAR', 4);
  36. /**
  37. * CSVParser
  38. *
  39. * @package iTopORM
  40. */
  41. class CSVParser
  42. {
  43. private $m_sCSVData;
  44. private $m_sSep;
  45. private $m_sTextQualifier;
  46. public function __construct($sTxt, $sSep = ',', $sTextQualifier = '"')
  47. {
  48. $this->m_sCSVData = str_replace("\r\n", "\n", $sTxt);
  49. $this->m_sSep = $sSep;
  50. $this->m_sTextQualifier = $sTextQualifier;
  51. }
  52. protected $m_sCurrCell = '';
  53. protected $m_aCurrRow = array();
  54. protected $m_iToSkip = 0;
  55. protected $m_aDataSet = array();
  56. protected function __AddChar($c)
  57. {
  58. $this->m_sCurrCell .= $c;
  59. }
  60. protected function __ClearCell()
  61. {
  62. $this->m_sCurrCell = '';
  63. }
  64. protected function __AddCell($c = null, $aFieldMap = null, $bTrimSpaces = false)
  65. {
  66. if ($bTrimSpaces)
  67. {
  68. $sCell = trim($this->m_sCurrCell);
  69. }
  70. else
  71. {
  72. $sCell = $this->m_sCurrCell;
  73. }
  74. if (!is_null($aFieldMap))
  75. {
  76. $iNextCol = count($this->m_aCurrRow);
  77. $iNextName = $aFieldMap[$iNextCol];
  78. $this->m_aCurrRow[$iNextName] = $sCell;
  79. }
  80. else
  81. {
  82. $this->m_aCurrRow[] = $sCell;
  83. }
  84. $this->m_sCurrCell = '';
  85. }
  86. protected function __AddRow($c = null, $aFieldMap = null, $bTrimSpaces = false)
  87. {
  88. $this->__AddCell($c, $aFieldMap, $bTrimSpaces);
  89. if ($this->m_iToSkip > 0)
  90. {
  91. $this->m_iToSkip--;
  92. }
  93. elseif (count($this->m_aCurrRow) > 1)
  94. {
  95. $this->m_aDataSet[] = $this->m_aCurrRow;
  96. }
  97. elseif (count($this->m_aCurrRow) == 1)
  98. {
  99. // Get the unique value
  100. $aValues = array_values($this->m_aCurrRow);
  101. $sValue = $aValues[0];
  102. if (strlen($sValue) > 0)
  103. {
  104. $this->m_aDataSet[] = $this->m_aCurrRow;
  105. }
  106. }
  107. else
  108. {
  109. // blank line, skip silently
  110. }
  111. $this->m_aCurrRow = array();
  112. }
  113. protected function __AddCellTrimmed($c = null, $aFieldMap = null)
  114. {
  115. $this->__AddCell($c, $aFieldMap, true);
  116. }
  117. protected function __AddRowTrimmed($c = null, $aFieldMap = null)
  118. {
  119. $this->__AddRow($c, $aFieldMap, true);
  120. }
  121. function ToArray($iToSkip = 1, $aFieldMap = null, $iMax = 0)
  122. {
  123. $aTransitions = array();
  124. $aTransitions[stSTARTING][evBLANK] = array('', stSTARTING);
  125. $aTransitions[stSTARTING][evSEPARATOR] = array('__AddCell', stSTARTING);
  126. $aTransitions[stSTARTING][evNEWLINE] = array('__AddRow', stSTARTING);
  127. $aTransitions[stSTARTING][evTEXTQUAL] = array('', stQUALIFIED);
  128. $aTransitions[stSTARTING][evOTHERCHAR] = array('__AddChar', stRAW);
  129. $aTransitions[stRAW][evBLANK] = array('__AddChar', stRAW);
  130. $aTransitions[stRAW][evSEPARATOR] = array('__AddCellTrimmed', stSTARTING);
  131. $aTransitions[stRAW][evNEWLINE] = array('__AddRowTrimmed', stSTARTING);
  132. $aTransitions[stRAW][evTEXTQUAL] = array('__AddChar', stRAW);
  133. $aTransitions[stRAW][evOTHERCHAR] = array('__AddChar', stRAW);
  134. $aTransitions[stQUALIFIED][evBLANK] = array('__AddChar', stQUALIFIED);
  135. $aTransitions[stQUALIFIED][evSEPARATOR] = array('__AddChar', stQUALIFIED);
  136. $aTransitions[stQUALIFIED][evNEWLINE] = array('__AddChar', stQUALIFIED);
  137. $aTransitions[stQUALIFIED][evTEXTQUAL] = array('', stESCAPED);
  138. $aTransitions[stQUALIFIED][evOTHERCHAR] = array('__AddChar', stQUALIFIED);
  139. $aTransitions[stESCAPED][evBLANK] = array('', stESCAPED);
  140. $aTransitions[stESCAPED][evSEPARATOR] = array('__AddCell', stSTARTING);
  141. $aTransitions[stESCAPED][evNEWLINE] = array('__AddRow', stSTARTING);
  142. $aTransitions[stESCAPED][evTEXTQUAL] = array('__AddChar', stQUALIFIED);
  143. $aTransitions[stESCAPED][evOTHERCHAR] = array('__AddChar', stSTARTING);
  144. // Reset parser variables
  145. $this->m_sCurrCell = '';
  146. $this->m_aCurrRow = array();
  147. $this->m_iToSkip = $iToSkip;
  148. $this->m_aDataSet = array();
  149. $iState = stSTARTING;
  150. for($i = 0; $i < strlen($this->m_sCSVData) ; $i++)
  151. {
  152. $c = $this->m_sCSVData[$i];
  153. // // Note: I did that because the unit test was not working fine (file edited with notepad: \n chars padded :-(
  154. // if (ord($c) == 0) continue;
  155. if ($c == $this->m_sSep)
  156. {
  157. $iEvent = evSEPARATOR;
  158. }
  159. elseif ($c == ' ')
  160. {
  161. $iEvent = evBLANK;
  162. }
  163. elseif ($c == "\t")
  164. {
  165. $iEvent = evBLANK;
  166. }
  167. elseif ($c == "\n")
  168. {
  169. $iEvent = evNEWLINE;
  170. }
  171. elseif ($c == $this->m_sTextQualifier)
  172. {
  173. $iEvent = evTEXTQUAL;
  174. }
  175. else
  176. {
  177. $iEvent = evOTHERCHAR;
  178. }
  179. $sAction = $aTransitions[$iState][$iEvent][0];
  180. $iState = $aTransitions[$iState][$iEvent][1];
  181. if (!empty($sAction))
  182. {
  183. $aCallSpec = array($this, $sAction);
  184. if (is_callable($aCallSpec))
  185. {
  186. call_user_func($aCallSpec, $c, $aFieldMap);
  187. }
  188. else
  189. {
  190. throw new CSVParserException("CSVParser: unknown verb '$sAction'");
  191. }
  192. }
  193. $iLineCount = count($this->m_aDataSet);
  194. if (($iMax > 0) && ($iLineCount >= $iMax)) break;
  195. }
  196. // Close the final line
  197. $this->__AddRow(null, $aFieldMap);
  198. return $this->m_aDataSet;
  199. }
  200. public function ListFields()
  201. {
  202. $aHeader = $this->ToArray(0, null, 1);
  203. return $aHeader[0];
  204. }
  205. }
  206. ?>