PHP解析Android APK包的XML文件[转]

jerry PHP 2015年11月23日 收藏

2012.07.05更新: 某些系统apk文件的字符串表不是使用双字节,会导致解码越界. 修改了getStringTab()成员函数, 详细请看代码部分.

最近做的一个项目需要用PHP直接读取apk包中的信息,如:包名,入口地址,版本号等信息。这些信息都保存在apk包中的AndroidManifest.xml文件中。但是在编译apk包的过程中,所有XML文件都经过了压缩编码,直接读取xml文件内容是无法解析出需要的信息的。

Google搜索了一下没有找到这个功能的PHP实现,只有找到一个Java工具AXMLPrinter,可以还原经过压缩后的XML文件内容。于是就参考了这个Java工具的源码,按照这个压缩后的XML文件格式写了一个PHP的还原功能类。

类实现了直接读取apk包中指定的XML文件内容(使用PHP的zip功能函数直接解压apk包),无需手动解压apk获取文件。

类功能不多,所有供外部调用的方法都表明了公开,私有函数均是解码函数,不推荐直接调用。

使用例子:

  1. <?php
  2. require('apk_parser.php');
  3. $p = new ApkParser();
  4. $res = $p->open('android_app.apk');
  5. echo $p->getXML();
  6. ?>

apk_parser.php源代码:

  1. <?php
  2. /******************************************************
  3. * Android APK File Parser
  4. * Author: Katana
  5. * Version: v0.1
  6. * Web: http://www.win-ing.cn
  7. *
  8. * 功能:解析安卓apk包中的压缩XML文件,还原和读取XML内容
  9. *
  10. * 依赖功能:需要PHP的ZIP包函数支持。
  11. ******************************************************/
  12.  
  13. class ApkParser{
  14. //----------------------
  15. // 公共函数,供外部调用
  16. //----------------------
  17. public function open($apk_file, $xml_file='AndroidManifest.xml'){
  18. $zip = new ZipArchive;
  19. if ($zip->open($apk_file) === TRUE) {
  20. $xml = $zip->getFromName($xml_file);
  21. $zip->close();
  22. if ($xml){
  23. try {
  24. return $this->parseString($xml);
  25. }catch (Exception $e){
  26. }
  27. }
  28. }
  29. return false;
  30. }
  31.  
  32. public function parseString($xml){
  33. $this->xml = $xml;
  34. $this->length = strlen($xml);
  35.  
  36. $this->root = $this->parseBlock(self::AXML_FILE);
  37. return true;
  38. }
  39.  
  40. public function getXML($node=NULL, $lv=-1){
  41. if ($lv == -1) $node = $this->root;
  42. if (!$node) return '';
  43.  
  44. if ($node['type'] == self::END_TAG) $lv--;
  45. $xml = ($node['line'] == 0 || $node['line'] == $this->line) ? '' : "\n".str_repeat(' ', $lv);
  46. $xml .= $node['tag'];
  47. $this->line = $node['line'];
  48. foreach ($node['child'] as $c){
  49. $xml .= $this->getXML($c, $lv+1);
  50. }
  51. return $xml;
  52. }
  53.  
  54. public function getPackage(){
  55. return $this->getAttribute('manifest', 'package');
  56. }
  57.  
  58. public function getVersionName(){
  59. return $this->getAttribute('manifest', 'android:versionName');
  60. }
  61.  
  62. public function getVersionCode(){
  63. return $this->getAttribute('manifest', 'android:versionCode');
  64. }
  65.  
  66. public function getAppName(){
  67. return $this->getAttribute('manifest/application', 'android:name');
  68. }
  69.  
  70. public function getMainActivity(){
  71. for ($id=0; true; $id++){
  72. $act = $this->getAttribute("manifest/application/activity[{$id}]/intent-filter/action", 'android:name');
  73. if (!$act) break;
  74. if ($act == 'android.intent.action.MAIN') return $this->getActivity($id);
  75. }
  76. return NULL;
  77. }
  78.  
  79. public function getActivity($idx=0){
  80. $idx = intval($idx);
  81. return $this->getAttribute("manifest/application/activity[{$idx}]", 'android:name');
  82. }
  83.  
  84. public function getAttribute($path, $name){
  85. $r = $this->getElement($path);
  86. if (is_null($r)) return NULL;
  87.  
  88. if (isset($r['attrs'])){
  89. foreach ($r['attrs'] as $a){
  90. if ($a['ns_name'] == $name) return $this->getAttributeValue($a);
  91. }
  92. }
  93. return NULL;
  94. }
  95.  
  96. //----------------------
  97. // 类型常量定义
  98. //----------------------
  99. const AXML_FILE = 0x00080003;
  100. const STRING_BLOCK = 0x001C0001;
  101. const RESOURCEIDS = 0x00080180;
  102. const START_NAMESPACE = 0x00100100;
  103. const END_NAMESPACE = 0x00100101;
  104. const START_TAG = 0x00100102;
  105. const END_TAG = 0x00100103;
  106. const TEXT = 0x00100104;
  107.  
  108. const TYPE_NULL =0;
  109. const TYPE_REFERENCE =1;
  110. const TYPE_ATTRIBUTE =2;
  111. const TYPE_STRING =3;
  112. const TYPE_FLOAT =4;
  113. const TYPE_DIMENSION =5;
  114. const TYPE_FRACTION =6;
  115. const TYPE_INT_DEC =16;
  116. const TYPE_INT_HEX =17;
  117. const TYPE_INT_BOOLEAN =18;
  118. const TYPE_INT_COLOR_ARGB8 =28;
  119. const TYPE_INT_COLOR_RGB8 =29;
  120. const TYPE_INT_COLOR_ARGB4 =30;
  121. const TYPE_INT_COLOR_RGB4 =31;
  122.  
  123. const UNIT_MASK = 15;
  124. private static $RADIX_MULTS = array(0.00390625, 3.051758E-005, 1.192093E-007, 4.656613E-010);
  125. private static $DIMENSION_UNITS = array("px","dip","sp","pt","in","mm","","");
  126. private static $FRACTION_UNITS = array("%","%p","","","","","","");
  127.  
  128. private $xml='';
  129. private $length = 0;
  130. private $stringCount = 0;
  131. private $styleCount = 0;
  132. private $stringTab = array();
  133. private $styleTab = array();
  134. private $resourceIDs = array();
  135. private $ns = array();
  136. private $cur_ns = NULL;
  137. private $root = NULL;
  138. private $line = 0;
  139.  
  140. //----------------------
  141. // 内部私有函数
  142. //----------------------
  143. private function getElement($path){
  144. if (!$this->root) return NULL;
  145. $ps = explode('/', $path);
  146. $r = $this->root;
  147. foreach ($ps as $v){
  148. if (preg_match('/([^\[]+)\[([0-9]+)\]$/', $v, $ms)){
  149. $v = $ms[1];
  150. $off = $ms[2];
  151. }else {
  152. $off = 0;
  153. }
  154. foreach ($r['child'] as $c){
  155. if ($c['type'] == self::START_TAG && $c['ns_name'] == $v){
  156. if ($off == 0){
  157. $r = $c; continue 2;
  158. }else {
  159. $off--;
  160. }
  161. }
  162. }
  163. // 没有找到节点
  164. return NULL;
  165. }
  166. return $r;
  167. }
  168.  
  169. private function parseBlock($need = 0){
  170. $o = 0;
  171. $type = $this->get32($o);
  172. if ($need && $type != $need) throw new Exception('Block Type Error', 1);
  173. $size = $this->get32($o);
  174. if ($size < 8 || $size > $this->length) throw new Exception('Block Size Error', 2);
  175. $left = $this->length - $size;
  176.  
  177. $props = false;
  178. switch ($type){
  179. case self::AXML_FILE:
  180. $props = array(
  181. 'line' => 0,
  182. 'tag' => '<?xml version="1.0" encoding="utf-8"?>'
  183. );
  184. break;
  185. case self::STRING_BLOCK:
  186. $this->stringCount = $this->get32($o);
  187. $this->styleCount = $this->get32($o);
  188. $o += 4;
  189. $strOffset = $this->get32($o);
  190. $styOffset = $this->get32($o);
  191. $strListOffset = $this->get32array($o, $this->stringCount);
  192. $styListOffset = $this->get32array($o, $this->styleCount);
  193. $this->stringTab = $this->stringCount > 0 ? $this->getStringTab($strOffset, $strListOffset) : array();
  194. $this->styleTab = $this->styleCount > 0 ? $this->getStringTab($styOffset, $styListOffset) : array();
  195. $o = $size;
  196. break;
  197. case self::RESOURCEIDS:
  198. $count = $size / 4 - 2;
  199. $this->resourceIDs = $this->get32array($o, $count);
  200. break;
  201. case self::START_NAMESPACE:
  202. $o += 8;
  203. $prefix = $this->get32($o);
  204. $uri = $this->get32($o);
  205.  
  206. if (empty($this->cur_ns)){
  207. $this->cur_ns = array();
  208. $this->ns[] = &$this->cur_ns;
  209. }
  210. $this->cur_ns[$uri] = $prefix;
  211. break;
  212. case self::END_NAMESPACE:
  213. $o += 8;
  214. $prefix = $this->get32($o);
  215. $uri = $this->get32($o);
  216.  
  217. if (empty($this->cur_ns)) break;
  218. unset($this->cur_ns[$uri]);
  219. break;
  220. case self::START_TAG:
  221. $line = $this->get32($o);
  222.  
  223. $o += 4;
  224. $attrs = array();
  225. $props = array(
  226. 'line' => $line,
  227. 'ns' => $this->getNameSpace($this->get32($o)),
  228. 'name' => $this->getString($this->get32($o)),
  229. 'flag' => $this->get32($o),
  230. 'count' => $this->get16($o),
  231. 'id' => $this->get16($o)-1,
  232. 'class' => $this->get16($o)-1,
  233. 'style' => $this->get16($o)-1,
  234. 'attrs' => &$attrs
  235. );
  236. $props['ns_name'] = $props['ns'].$props['name'];
  237. for ($i=0; $i < $props['count']; $i++){
  238. $a = array(
  239. 'ns' => $this->getNameSpace($this->get32($o)),
  240. 'name' => $this->getString($this->get32($o)),
  241. 'val_str' => $this->get32($o),
  242. 'val_type' => $this->get32($o),
  243. 'val_data' => $this->get32($o)
  244. );
  245. $a['ns_name'] = $a['ns'].$a['name'];
  246. $a['val_type'] >>= 24;
  247. $attrs[] = $a;
  248. }
  249. // 处理TAG字符串
  250. $tag = "<{$props['ns_name']}";
  251. foreach ($this->cur_ns as $uri => $prefix){
  252. $uri = $this->getString($uri);
  253. $prefix = $this->getString($prefix);
  254. $tag .= " xmlns:{$prefix}=\"{$uri}\"";
  255. }
  256. foreach ($props['attrs'] as $a){
  257. $tag .= " {$a['ns_name']}=\"".
  258. $this->getAttributeValue($a).
  259. '"';
  260. }
  261. $tag .= '>';
  262. $props['tag'] = $tag;
  263.  
  264. unset($this->cur_ns);
  265. $this->cur_ns = array();
  266. $this->ns[] = &$this->cur_ns;
  267. $left = -1;
  268. break;
  269. case self::END_TAG:
  270. $line = $this->get32($o);
  271. $o += 4;
  272. $props = array(
  273. 'line' => $line,
  274. 'ns' => $this->getNameSpace($this->get32($o)),
  275. 'name' => $this->getString($this->get32($o))
  276. );
  277. $props['ns_name'] = $props['ns'].$props['name'];
  278. $props['tag'] = "</{$props['ns_name']}>";
  279. if (count($this->ns) > 1){
  280. array_pop($this->ns);
  281. unset($this->cur_ns);
  282. $this->cur_ns = array_pop($this->ns);
  283. $this->ns[] = &$this->cur_ns;
  284. }
  285. break;
  286. case self::TEXT:
  287. $o += 8;
  288. $props = array(
  289. 'tag' => $this->getString($this->get32($o))
  290. );
  291. $o += 8;
  292. break;
  293. default:
  294. throw new Exception('Block Type Error', 3);
  295. break;
  296. }
  297.  
  298. $this->skip($o);
  299. $child = array();
  300. while ($this->length > $left){
  301. $c = $this->parseBlock();
  302. if ($props && $c) $child[] = $c;
  303. if ($left == -1 && $c['type'] == self::END_TAG){
  304. $left = $this->length;
  305. break;
  306. }
  307. }
  308. if ($this->length != $left) throw new Exception('Block Overflow Error', 4);
  309. if ($props){
  310. $props['type'] = $type;
  311. $props['size'] = $size;
  312. $props['child'] = $child;
  313. return $props;
  314. }else {
  315. return false;
  316. }
  317. }
  318.  
  319. private function getAttributeValue($a){
  320. $type = &$a['val_type'];
  321. $data = &$a['val_data'];
  322. switch ($type){
  323. case self::TYPE_STRING:
  324. return $this->getString($a['val_str']);
  325. case self::TYPE_ATTRIBUTE:
  326. return sprintf('?%s%08X', self::_getPackage($data), $data);
  327. case self::TYPE_REFERENCE:
  328. return sprintf('@%s%08X', self::_getPackage($data), $data);
  329. case self::TYPE_INT_HEX:
  330. return sprintf('0x%08X', $data);
  331. case self::TYPE_INT_BOOLEAN:
  332. return ($data != 0 ? 'true' : 'false');
  333. case self::TYPE_INT_COLOR_ARGB8:
  334. case self::TYPE_INT_COLOR_RGB8:
  335. case self::TYPE_INT_COLOR_ARGB4:
  336. case self::TYPE_INT_COLOR_RGB4:
  337. return sprintf('#%08X', $data);
  338. case self::TYPE_DIMENSION:
  339. return $this->_complexToFloat($data).self::$DIMENSION_UNITS[$data & self::UNIT_MASK];
  340. case self::TYPE_FRACTION:
  341. return $this->_complexToFloat($data).self::$FRACTION_UNITS[$data & self::UNIT_MASK];
  342. case self::TYPE_FLOAT:
  343. return $this->_int2float($data);
  344. }
  345. if ($type >=self::TYPE_INT_DEC && $type < self::TYPE_INT_COLOR_ARGB8){
  346. return (string)$data;
  347. }
  348. return sprintf('<0x%X, type 0x%02X>', $data, $type);
  349. }
  350.  
  351. private function _complexToFloat($data){
  352. return (float)($data & 0xFFFFFF00) * self::$RADIX_MULTS[($data>>4) & 3];
  353. }
  354. private function _int2float($v) {
  355. $x = ($v & ((1 << 23) - 1)) + (1 << 23) * ($v >> 31 | 1);
  356. $exp = ($v >> 23 & 0xFF) - 127;
  357. return $x * pow(2, $exp - 23);
  358. }
  359. private static function _getPackage($data){
  360. return ($data >> 24 == 1) ? 'android:' : '';
  361. }
  362.  
  363. private function getStringTab($base, $list){
  364. $tab = array();
  365. foreach ($list as $off){
  366. $off += $base;
  367. $len = $this->get16($off);
  368. $mask = ($len >> 0x8) & 0xFF;
  369. $len = $len & 0xFF;
  370. if ($len == $mask){
  371. if ($off + $len > $this->length) throw new Exception('String Table Overflow', 11);
  372. $tab[] = substr($this->xml, $off, $len);
  373. }else {
  374. if ($off + $len * 2 > $this->length) throw new Exception('String Table Overflow', 11);
  375. $str = substr($this->xml, $off, $len * 2);
  376. $tab[] = mb_convert_encoding($str, 'UTF-8', 'UCS-2LE');
  377. }
  378. }
  379. return $tab;
  380. }
  381. private function getString($id){
  382. if ($id > -1 && $id < $this->stringCount){
  383. return $this->stringTab[$id];
  384. }else {
  385. return '';
  386. }
  387. }
  388. private function getNameSpace($uri){
  389. for ($i=count($this->ns); $i > 0; ){
  390. $ns = $this->ns[--$i];
  391. if (isset($ns[$uri])){
  392. $ns = $this->getString($ns[$uri]);
  393. if (!empty($ns)) $ns .= ':';
  394. return $ns;
  395. }
  396. }
  397. return '';
  398. }
  399. private function get32(&$off){
  400. $int = unpack('V', substr($this->xml, $off, 4));
  401. $off += 4;
  402. return array_shift($int);
  403. }
  404. private function get32array(&$off, $size){
  405. if ($size <= 0) return NULL;
  406. $arr = unpack('V*', substr($this->xml, $off, 4 * $size));
  407. if (count($arr) != $size) throw new Exception('Array Size Error', 10);
  408. $off += 4 * $size;
  409. return $arr;
  410. }
  411. private function get16(&$off){
  412. $int = unpack('v', substr($this->xml, $off, 2));
  413. $off += 2;
  414. return array_shift($int);
  415. }
  416. private function skip($size){
  417. $this->xml = substr($this->xml, $size);
  418. $this->length -= $size;
  419. }
  420. }
  421. ?>