From 64fb7ac1ff4fffe6c29e8eaa38967acc454a18ae Mon Sep 17 00:00:00 2001
From: Remi Collet <fedora@famillecollet.com>
Date: Sat, 26 Jul 2014 19:39:57 +0200
Subject: add parser to retrieve packagist package name from binary repository

---
 class/Parser.php        | 183 +++++++++++++++++++++++++++++++++++++-----------
 class/TableIterator.php |   5 ++
 2 files changed, 147 insertions(+), 41 deletions(-)

(limited to 'class')

diff --git a/class/Parser.php b/class/Parser.php
index 98fd997..fff7fb0 100644
--- a/class/Parser.php
+++ b/class/Parser.php
@@ -165,18 +165,15 @@ class Parser
     }
 
     /**
-     * Parse the content of all RPM repository
+     * Get the metadata of a RPM repository
      *
-     * @param TableRpm      $rpmtable   the table to write to
      * @param TableRpmRepo  $rpmrepo    the table to store timestamp
      * @param hastable      $row        the repo to read from
      *
-     * @return integer number of parsed line
+     * @return simplexml
      */
-    static public function readOneRpm(TableRpm $rpmtable, TableRpmRepo $rpmrepo, Array $row)
+    static private function readMetadata(TableRpmRepo $rpmrepo, Array $row)
     {
-        $tot = 0;
-
         self::log("REPOSITORY " . $row['main'] . " " . $row['sub']);
         $TimRemote = 0;
         $repomd = @simplexml_load_file($row['url'] . "repodata/repomd.xml");
@@ -194,7 +191,6 @@ class Parser
         } else if ($TimRemote > $row['stamp']) {
             self::log("Loading $UrlRemote");
 
-            //$fic=gzopen("primary.xml.gz", "r");
             $fic=gzopen($UrlRemote, "r");
             if ($fic) {
                 $txt="";
@@ -205,49 +201,67 @@ class Parser
                 gzclose($fic);
 
                 $primary = simplexml_load_string($txt);
-                self::log("Read " . $primary->attributes() . " packages");
-                unset($txt);
+                if ($primary) {
+                    self::log("Read " . $primary->attributes() . " packages");
+                    $rpmrepo->update($row['id'], array('stamp' =>$TimRemote));
 
-                $crit = array(
-                    'repo_main' => $row['main'],
-                    'repo_sub'  => $row['sub']
-                );
-                $nb = $rpmtable->delete($crit);
-                self::log("Delete $nb packages");
-
-                foreach ($primary->package as $package) {
-                    if ($package->attributes()=='rpm') {
-                        $ver = $package->version->attributes();
-                        $loc = $package->location->attributes();
-
-                        $input = array(
-                            'repo_main' => $row['main'],
-                            'repo_sub'  => $row['sub'],
-                            'name'      => $package->name,
-                            'epoch'     => $ver['epoch'],
-                            'ver'       => $ver['ver'],
-                            'rel'       => $ver['rel'],
-                            'summary'   => $package->summary,
-                            'location'  => $loc['href'],
-                            'url'       => $package->url
-                        );
-                        if ($rpmtable->add($input)) {
-                            $tot++;
-                        }
-                    }
+                    return $primary;
+                } else {
+                    self::log("ERROR : can't parse $UrlRemote");
                 }
-                self::log("Write $tot packages");
-
-                $rpmrepo->update($row['id'], array('stamp' =>$TimRemote));
-
-                unset($primary);
             } else {
                 self::log("ERROR : can't read $UrlRemote");
             }
         } else {
             self::log("no update needed : $TimRemote / " . $row['stamp']);
         }
+        return NULL;
+    }
+    /**
+     * Parse the content of all RPM repository
+     *
+     * @param TableRpm      $rpmtable   the table to write to
+     * @param TableRpmRepo  $rpmrepo    the table to store timestamp
+     * @param hastable      $row        the repo to read from
+     *
+     * @return integer number of parsed line
+     */
+    static private function readOneRpm(TableRpm $rpmtable, TableRpmRepo $rpmrepo, Array $row)
+    {
+        $tot = 0;
+
+        $primary = self::readMetadata($rpmrepo, $row);
+        if ($primary) {
+            $crit = array(
+                'repo_main' => $row['main'],
+                'repo_sub'  => $row['sub']
+            );
+            $nb = $rpmtable->delete($crit);
+            self::log("Delete $nb packages");
 
+            foreach ($primary->package as $package) {
+                if ($package->attributes()=='rpm') {
+                    $ver = $package->version->attributes();
+                    $loc = $package->location->attributes();
+
+                    $input = array(
+                        'repo_main' => $row['main'],
+                        'repo_sub'  => $row['sub'],
+                        'name'      => $package->name,
+                        'epoch'     => $ver['epoch'],
+                        'ver'       => $ver['ver'],
+                        'rel'       => $ver['rel'],
+                        'summary'   => $package->summary,
+                        'location'  => $loc['href'],
+                        'url'       => $package->url
+                    );
+                    if ($rpmtable->add($input)) {
+                        $tot++;
+                    }
+                }
+            }
+            self::log("Write $tot packages");
+        }
         return $tot;
     }
 
@@ -270,6 +284,93 @@ class Parser
         return $tot;
     }
 
+    /**
+     * Parse the content of one RPM repository for Provides
+     *
+     * @param TableRpmRepo  $rpmrepo    the table to store timestamp
+     * @param hastable      $row        the repo to read from
+     * @param hastable      $result     found packagist packages
+     *
+     * @return integer number of parsed line
+     */
+    static private function readOneProv(TableRpmRepo $rpmrepo, Array $row, Array &$result)
+    {
+        $tot = 0;
+
+        $primary = self::readMetadata($rpmrepo, $row);
+        if ($primary) {
+            $crit = array(
+                'repo_main' => $row['main'],
+                'repo_sub'  => $row['sub']
+            );
+
+            foreach ($primary->package as $package) {
+                if ($package->attributes()=='rpm') {
+                    $srpm = false;
+                    $composer = false;
+                    foreach ($package->format->children('rpm', true) as $fmt) {
+                        // get the source rpm name
+                        if ($fmt->getName() == 'sourcerpm') {
+                            if (preg_match('/^(.*)-([^-]*)-([^-]*)\.src\.rpm$/', $fmt, $reg)) {
+                                $srpm = $reg[1];
+                            }
+                        }
+                        if ($fmt->getName() != 'provides') {
+                            continue;
+                        }
+                        // Parse the provides
+                        foreach ($fmt as $fmt2) {
+                            $prov = $fmt2->attributes()['name'];
+                            if (preg_match('/^php-composer\((.*)\)$/', $prov, $reg)) {
+                                $composer = $reg[1];
+                            }
+                        }
+                    }
+                    // Save composer name only for main package
+                    if ($composer && $srpm && ($package->name == $srpm)) {
+                        $result[$srpm] = $composer;
+                    }
+                }
+            }
+            self::log("Read $tot packages");
+        }
+        return $tot;
+    }
+
+    /**
+     * Parse the content of all RPM repository for Provides
+     *
+     * @param TableRpm      $rpmtable   the table to write to
+     * @param TableRpmRepo  $rpmrepo    the table to read from
+     * @param Array         $crit       array for repo selection
+     *
+     * @return integer number of parsed line
+     */
+    static public function readProvides(TablePackagist $pkgtable, TableRpmRepo $rpmrepo, Array $crit)
+    {
+        $tot = 0;
+        $result = array();
+
+        foreach ($rpmrepo->request($crit) as $row) {
+            self::readOneProv($rpmrepo, $row, $result);
+        }
+        if (count($result)) {
+            $pkgtable->truncate();
+            foreach($result as $rpm => $pkg) {
+                $input = array(
+                    'rpmname' => $rpm,
+                    'pkgname' => $pkg,
+                );
+                if ($pkgtable->add($input)) {
+                    $tot++;
+                }
+            }
+        }
+        self::log("Write $tot packagist packages");
+
+        return $tot;
+    }
+
     /**
      * Parse the PECL webservices
      *
diff --git a/class/TableIterator.php b/class/TableIterator.php
index 831e845..4a4c1b6 100644
--- a/class/TableIterator.php
+++ b/class/TableIterator.php
@@ -212,6 +212,11 @@ class TableIterator  implements Iterator
             } else if (is_numeric($value)) {
                 // Integer
                 $ret .= "$name=$value";
+            } else if (($value[0]=='>' || $value[0]=='<')
+                       && is_numeric(substr($value,1))) {
+                // > integer
+                $ret .= "$name $value";
+
             } else if (strpos($value,'%')===false){
                 // String
                 $ret .= "$name='$value'";
-- 
cgit