. * * @category Main * @package RPMPHP * * @author Remi Collet * @author Johan Cwiklinski * @copyright 2010-2014 Remi Collet * @license http://www.gnu.org/licenses/agpl-3.0-standalone.html AGPL License 3.0 or (at your option) any later version * @link https://git.remirepo.net/cgit/web/rpmphp.git/ * @since The begining of times. */ class Parser { /** * Display a message */ static public function log($msg) { echo date("r : ") . $msg ."\n"; } /** * Parse the Bugzilla ACL list from pkgdb * * @param TableAcl $acls the table to write to * @param string $url the file to read from * * @return integer number of parsed line */ static public function readAcls(TableAcls $acls, $url) { $tot = 0; self::log("Read pkgdb/owner"); $fic=fopen($url, 'r'); if (!$fic) { self::log("ERROR reading '$url'"); } else { $nb = $acls->getCount(); $acls->truncate(); self::log("Delete $nb owners"); for ($tot=0 ; $line=fgetcsv($fic, 1024, '|'); ) { if (count($line)>5 && substr($line[0], 0, 1)!='#') { for ($i=0; $i<6; $i++) { $line[$i]=trim($line[$i]); } $input = array( 'collection' => $line[0], 'name' => $line[1], 'summary' => $line[2], ); if (!empty($line[3])) { $input['owner'] = $line[3]; } if (!empty($line[4])) { $input['qa'] = $line[3]; } if (!empty($line[5])) { $input['cc'] = $line[3]; } if ($acls->add($input)) { $tot++; } } } fclose($fic); self::log("wrote $tot package's owner"); } return $tot; } /** * Parse the content of a R repository * * @param TableUpstream $uptable the table to write to * @param hastable $repo the repo to read from * * @return integer number of parsed line */ static private function readOneR(TableUpstream $uptable, Array $repo) { $tot = 0; self::log("Reading " . $repo["name"] . " (" . $repo["state"] . ")"); $index = @file_get_contents($repo["url"]); if (!$index) { self::log("Can't read [" . $repo["url"] . "], skip this channel"); return 0; } if ($repo["state"]=="stable") { $crit = array('type'=>'R', 'channel'=>$repo['name']); $nb = $uptable->delete($crit); self::log("Delete $nb packages"); } $results=array(); $pat = '/Package: *(.*)\nVersion: *(.*)\n/i'; if (preg_match_all($pat, $index, $results, PREG_SET_ORDER)) { foreach ($results as $result) { $rpmname = "R-".$result[1]; $ver = str_replace('-', '.', $result[2]); $add = $uptable->record( "R", $repo["name"], $rpmname, $ver, $repo["state"]=='stable', ($repo["state"]=="stable"?"":"devel") ); if ($add) { $tot++; } } self::log("Write $tot packages in this channel"); } else { self::log("No package in this channel"); } return $tot; } /** * Parse the content of all R repository * * @param TableUpstream $uptable the table to write to * @param TableRRepo $rrepo the table to read from * * @return integer number of parsed line */ static public function readR(TableUpstream $uptable, TableRRepo $rrepo) { $tot = 0; foreach ($rrepo->request() as $repo) { $tot += self::readOneR($uptable, $repo); } self::log("Write $tot packages in all channels"); return $tot; } /** * Get the metadata of a RPM repository * * @param TableRpmRepo $rpmrepo the table to store timestamp * @param hastable $row the repo to read from * * @return simplexml */ static private function readMetadata(TableRpmRepo $rpmrepo, Array $row) { self::log("REPOSITORY " . $row['main'] . " " . $row['sub']); $TimRemote = 0; $repomd = @simplexml_load_file($row['url'] . "repodata/repomd.xml"); if ($repomd) { foreach ($repomd->data as $data) { if ($data->attributes()=="primary") { $TimRemote = $data->timestamp; $UrlRemote = $row['url'] . $data->location->attributes(); } } } if (!$TimRemote) { self::log("Can't read " . $row['url']); } else if ($TimRemote > $row['stamp']) { self::log("Loading $UrlRemote"); $txt = file_get_contents($UrlRemote); if (!$txt) { self::log("ERROR : can't read $UrlRemote"); } else if (str_ends_with($UrlRemote, '.xml')) { // OK } else if (str_ends_with($UrlRemote, '.xz')) { if (function_exists('xzdecode')) { $txt = xzdecode($txt); } else { self::log("ERROR : missing xz php extension"); return NULL; } } else if (str_ends_with($UrlRemote, '.zst')) { if (function_exists('zstd_uncompress')) { $txt = zstd_uncompress($txt); } else { self::log("ERROR : missing zstd php extension"); return NULL; } } else if (str_ends_with($UrlRemote, '.gz')) { $txt = gzdecode($txt); } else { self::log("ERROR : unkown compression"); return NULL; } if ($txt) { $primary = simplexml_load_string($txt); if ($primary) { self::log("Read " . $primary->attributes() . " packages from primary"); $rpmrepo->update($row['id'], array('stamp' =>$TimRemote)); return $primary; } else { self::log("ERROR : can't parse $UrlRemote"); } } else { self::log("ERROR : can't uncompress $UrlRemote"); } } else { self::log("no update needed : $TimRemote / " . $row['stamp']); } return NULL; } /** * Parse the content of all RPM repository * * @param TableRpm $rpmtable the table to write to * @param TableRpmRepo $rpmrepo the table to store timestamp * @param hastable $row the repo to read from * * @return integer number of parsed line */ static private function readOneRpm(TableRpm $rpmtable, TableRpmRepo $rpmrepo, Array $row) { $tot = 0; $primary = self::readMetadata($rpmrepo, $row); if ($primary) { $crit = array( 'repo_main' => $row['main'], 'repo_sub' => $row['sub'] ); $nb = $rpmtable->delete($crit); self::log("Delete $nb packages"); foreach ($primary->package as $package) { if ($package->attributes()=='rpm') { $ver = $package->version->attributes(); $loc = $package->location->attributes(); $input = array( 'repo_main' => $row['main'], 'repo_sub' => $row['sub'], 'name' => $package->name, 'epoch' => $ver['epoch'], 'ver' => $ver['ver'], 'rel' => $ver['rel'], 'summary' => $package->summary, 'location' => $loc['href'], 'url' => $package->url ); if ($rpmtable->add($input)) { $tot++; } } } self::log("Write $tot packages"); } return $tot; } /** * Parse the content of all RPM repository * * @param TableRpm $rpmtable the table to write to * @param TableRpmRepo $rpmrepo the table to read from * @param Array $crit array for repo selection * * @return integer number of parsed line */ static public function readRpm(TableRpm $rpmtable, TableRpmRepo $rpmrepo, Array $crit) { $tot = 0; foreach ($rpmrepo->request($crit) as $row) { $tot += self::ReadOneRpm($rpmtable, $rpmrepo, $row); } return $tot; } /** * Parse the content of one RPM repository for Provides * * @param TableRpmRepo $rpmrepo the table to store timestamp * @param hastable $row the repo to read from * @param hastable $result found packagist packages * * @return integer number of parsed line */ static private function readOneProv(TableRpmRepo $rpmrepo, Array $row, Array &$result) { $tot = 0; $excl = array('udan11/sql-parser'); $primary = self::readMetadata($rpmrepo, $row); if ($primary) { foreach ($primary->package as $package) { if ($package->attributes()=='rpm') { $srpm = false; $composer = false; foreach ($package->format->children('rpm', true) as $fmt) { // get the source rpm name if ($fmt->getName() == 'sourcerpm') { if (preg_match('/^(.*)-([^-]*)-([^-]*)\.src\.rpm$/', $fmt, $reg)) { $srpm = $reg[1]; } } if ($fmt->getName() != 'provides') { continue; } // Parse the provides foreach ($fmt as $fmt2) { $prov = $fmt2->attributes()['name']; if (preg_match('/^php-(composer|pie)\((.*)\)$/', $prov, $reg)) { $type = $reg[1]; $name = $reg[2]; if (!strpos($name, '/')) { continue; } list($vend, $proj) = explode('/', $name, 2); if ($vend == 'zendframework') { if (!$composer) { // only if empty to keep laminas $composer = $name; } } else if (substr($reg[1], 0, 4) != 'ext-' && substr($name, -15) != '-implementation' && !in_array($reg[1], $excl)) { $composer = $name; } } } } // Save composer name only for main package if ($composer && $srpm && ($package->name == $srpm)) { $result[$srpm] = $composer; } } } self::log("Read $tot packages for provides"); } return $tot; } /** * Parse the content of all RPM repository for Provides * * @param TableRpm $rpmtable the table to write to * @param TableRpmRepo $rpmrepo the table to read from * @param Array $crit array for repo selection * * @return integer number of parsed line */ static public function readProvides(TablePackagist $pkgtable, TableRpmRepo $rpmrepo, Array $crit) { $tot = 0; $result = array(); foreach ($rpmrepo->request($crit) as $row) { self::readOneProv($rpmrepo, $row, $result); } if (count($result)) { $pkgtable->truncate(); foreach($result as $rpm => $pkg) { $input = array( 'rpmname' => $rpm, 'pkgname' => $pkg, ); try { if ($pkgtable->add($input)) { $tot++; } } catch (Exception $e) { # ignore duplicate key for now # ex pimple/pimple provided by php-pimple1 and php-pimple } } } self::log("Write $tot packagist packages"); return $tot; } /** * Parse the PECL webservices * * @param TableUpstream $uptable the table to write to * @param string $url the URL to read from * * @return integer number of parsed line */ static public function readPecl(TableUpstream $uptable, $url) { return self::readOnePear($uptable, 'pecl', $url); /* self::log("PECL listLatestReleases - stable"); $request = xmlrpc_encode_request("package.listLatestReleases", "stable"); $context = stream_context_create( array( 'http' => array( 'method' => "POST", 'header' => "Content-Type: text/xml", 'content' => $request ) ) ); $file = file_get_contents($url, false, $context); if (!$file) { self::log("Can't file_get_contents($url)"); return 0; } $stable = xmlrpc_decode($file); if (xmlrpc_is_fault($stable)) { self::log("ERROR xmlrpc: $stable[faultString] ($stable[faultCode])"); } else { $nb = $uptable->delete(array('type'=>'pecl', 'channel'=>'pecl')); self::log("Delete $nb packages"); $nb=0; foreach ($stable as $name => $info) { $rpmname="php-pecl-".str_replace("_", "-", $name); $id = $uptable->record( 'pecl', 'pecl', $rpmname, $info["version"], true ); if ($id) { $nb++; } } self::log("Write $nb packages"); } // ------------------------------------------------------------------- self::log("PECL listLatestReleases - unstable"); $request = xmlrpc_encode_request("package.listLatestReleases", array()); $context = stream_context_create( array( 'http' => array( 'method' => "POST", 'header' => "Content-Type: text/xml", 'content' => $request ) ) ); $file = file_get_contents($url, false, $context); if (!$file) { self::log("Can't file_get_contents($url)"); return 0; } $unstable = xmlrpc_decode($file); if (xmlrpc_is_fault($unstable)) { self::log("ERROR xmlrpc: $stable[faultString] ($stable[faultCode])"); } else { $nb=0; foreach ($unstable as $name => $info) { $rpmname="php-pecl-".str_replace("_", "-", $name); $id = $uptable->record( 'pecl', 'pecl', $rpmname, $info["version"], true, $info["state"] ); if ($id) { $nb++; } } self::log("Write $nb packages"); } return $nb; */ } /** * Parse the content of all Packagist repository * * @param TableUpstream $uptable the table to write to * @param TablePackagist $pktable the table to read from * * @return integer number of parsed line */ static public function readPackagist(TableUpstream $uptable, TablePackagist $pktable) { self::log("Packagist search releases"); $pk = new PackagistClient(); $nb = $uptable->delete(array('type'=>'composer')); self::log("Delete $nb packages"); $tot = 0; foreach($pktable->request(array('ORDER'=>'rpmname')) as $rec) { if ($rep = $pk->getPackage($rec['pkgname'])) { $v = explode('/', $rec['pkgname']); switch(count($v)) { case 3: $vendor = $v[0] . '/'.$v[1]; break; case 2: $vendor = "packagist/" . $v[0]; break; default: $vendor = "packagist"; } $id = false; if ($rep['stable']) { $id = $uptable->record( 'composer', $vendor, $rec['rpmname'], $rep['stable'], true ); } if ($rep['unstable']) { $id = $uptable->record( 'composer', $vendor, $rec['rpmname'], $rep['unstable'], false, $rep['state'] ); } if ($id) { $tot++; } } } self::log("Write $tot packages"); return $tot; } /** * Parse the content of all PEAR repository * * @param TableUpstream $uptable the table to write to * @param string $channelname the channel name * @param string $channelurl the channel URL * * @return integer number of parsed line */ static public function readOnePear(TableUpstream $uptable, $channelname, $channelurl) { $type = ($channelname=='pecl' ? 'pecl' : 'pear'); $channel = @simplexml_load_file("http://$channelurl/channel.xml"); if (!$channel) { self::log("can't read PEAR site (channel of $channelname)"); return 0; } $rest = $channel->servers->primary->rest->baseurl[0]; self::log("PEAR reading channel=$channelname, baseurl = $rest"); $categories = @simplexml_load_file($rest."c/categories.xml"); if (!$categories) { self::log("can't read PEAR site (categories)"); return 0; } $crit = array('type' => $type, 'channel' => $channelname); $nb = $uptable->delete($crit); self::log("Delete $nb packages"); $nb=0; if (!isset($categories->c[0])) { self::log("Reading ALL"); // ezc only $pitxt = @file_get_contents($rest."p/packages.xml"); if (!$pitxt) { self::log("can't read PEAR site (".$rest."p/packagesinfo.xml)"); return 0; } $allpi = @simplexml_load_string($pitxt); foreach ($allpi->p as $name) { $pitxt = @file_get_contents( $rest."r/".strtolower($name)."/allreleases.xml" ); if (!$pitxt) { self::log( "can't read PEAR site (".$rest."r/". strtolower($name)."/allreleases.xml" ); continue; } $pi = @simplexml_load_string($pitxt); $rpmname1="php-".$channelname."-". str_replace("_", "-", $name); $rpmname2="php-".$channelname."-".$name; $uptable->record( $type, $channelname, $rpmname1, (string)$pi->r[0]->v, false, (string)$pi->r[0]->s ); $uptable->record( $type, $channelname, $rpmname2, (string)$pi->r[0]->v, false, (string)$pi->r[0]->s ); foreach ($pi->r as $rev) { if ($rev->s=='stable') { $uptable->record( $type, $channelname, $rpmname1, (string)$rev->v, true ); $uptable->record( $type, $channelname, $rpmname2, (string)$rev->v, true ); break; } } $nb++; } } else { foreach ($categories->c as $cat) { self::log("Reading $cat"); $pitxt = @file_get_contents( $rest."c/".urlencode($cat)."/packagesinfo.xml" ); if (!$pitxt) { self::log( "can't read PEAR site (".$rest."c/". urlencode($cat)."/packagesinfo.xml)" ); continue; } $pitxt = "/U", "", str_replace("\r\n", "\n", substr($pitxt, 2)) ); $pitxt = str_replace("\xA0","", $pitxt); $pi = @simplexml_load_string($pitxt); if (!$pi) { self::log("can't read response ($cat)"); continue; } foreach ($pi->pi as $ps) { if (isset($ps->p->n) && isset($ps->a->r)) { $name=(string)$ps->p->n; if ($channelname=='phing' && $name=='phing') { $rpmname1="php-pear-phing"; } else if ($channelname=='phpunit' && $name=='PHPUnit') { $rpmname1="php-pear-PHPUnit"; } else if ($channelname=='pecl' && $name=='pecl_http') { $rpmname1="php-pecl-http"; } else { $rpmname1="php-".$channelname."-". str_replace("_", "-", $name); } $rpmname2="php-".$channelname."-".$name; $uptable->record( $type, $channelname, $rpmname1, (string)$ps->a->r[0]->v, false, (string)$ps->a->r[0]->s ); $uptable->record( $type, $channelname, $rpmname2, (string)$ps->a->r[0]->v, false, (string)$ps->a->r[0]->s ); foreach ($ps->a->r as $rev) { if ($rev->s=='stable') { $uptable->record( $type, $channelname, $rpmname1, (string)$rev->v, true ); $uptable->record( $type, $channelname, $rpmname2, (string)$rev->v, true ); break; } } $nb++; } } } } self::log("read $nb packages in $channelname"); return $nb; } /** * Parse the content of all PEAR repository * * @param TableUpstream $uptable the table to write to * @param TablePearRepo $pear the table to read from * * @return integer number of parsed line */ static public function readPear(TableUpstream $uptable, TablePearRepo $pear) { $tot = 0; self::log("PEAR reading channels"); $channels = $pear->getAllRepo(true); foreach ($channels as $channelname => $channelurl) { $tot += self::readOnePear($uptable, $channelname, $channelurl); } self::log("Write $tot packages in all channels"); return $tot; } } ?>