Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions src/apify/_actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1162,7 +1162,7 @@ async def metamorph(
custom_after_sleep: How long to sleep for after the metamorph, to wait for the container to be stopped.
"""
if not self.is_at_home():
self.log.error('Actor.metamorph() is only supported when running on the Apify platform.')
self.log.warning('Actor.metamorph() is only supported when running on the Apify platform.')
return

if custom_after_sleep is None:
Expand All @@ -1172,6 +1172,7 @@ async def metamorph(
if not self.configuration.actor_run_id:
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')

self.log.debug(f'Metamorphing the Actor run into target Actor {target_actor_id!r}.')
await self.apify_client.run(self.configuration.actor_run_id).metamorph(
target_actor_id=target_actor_id,
run_input=run_input,
Expand All @@ -1198,7 +1199,7 @@ async def reboot(
custom_after_sleep: How long to sleep for after the reboot, to wait for the container to be stopped.
"""
if not self.is_at_home():
self.log.error('Actor.reboot() is only supported when running on the Apify platform.')
self.log.warning('Actor.reboot() is only supported when running on the Apify platform.')
return

if self._is_rebooting:
Expand Down Expand Up @@ -1243,6 +1244,7 @@ async def safe_dispatch(listener: Any, data: Any) -> None:
except TimeoutError:
self.log.warning('Pre-reboot event listeners did not finish within timeout; proceeding with reboot')

self.log.debug('Rebooting the Actor run.')
await self.apify_client.run(self.configuration.actor_run_id).reboot()
except BaseException:
# Reset the flag so that a failed or cancelled reboot can be retried.
Expand Down Expand Up @@ -1277,7 +1279,7 @@ async def add_webhook(self, webhook: Webhook, *, idempotency_key: str | None = N
)

if not self.is_at_home():
self.log.error('Actor.add_webhook() is only supported when running on the Apify platform.')
self.log.warning('Actor.add_webhook() is only supported when running on the Apify platform.')
return

# If is_at_home() is True, config.actor_run_id is always set
Expand Down Expand Up @@ -1454,9 +1456,9 @@ def _get_remaining_time(self) -> timedelta | None:
return max(self.configuration.timeout_at - datetime.now(tz=UTC), timedelta(0))

self.log.warning(
'Using `inherit` argument is only possible when the Actor'
' is running on the Apify platform and when the timeout for the Actor run is set. '
f'{self.is_at_home()=}, {self.configuration.timeout_at=}'
'Using the `inherit` argument is only possible when the Actor is running on the Apify platform and '
'the timeout for the Actor run is set.',
extra={'is_at_home': self.is_at_home(), 'timeout_at': self.configuration.timeout_at},
)
return None

Expand Down
5 changes: 4 additions & 1 deletion src/apify/_charging.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from dataclasses import dataclass
from datetime import UTC, datetime
from decimal import Decimal
from logging import getLogger
from typing import TYPE_CHECKING, Literal, Protocol, TypedDict

from pydantic import ConfigDict
Expand All @@ -19,7 +20,6 @@
from apify_client._models import PricingPerEvent as ClientPricingPerEvent

from apify._utils import ReentrantLock, docs_group, ensure_context
from apify.log import logger
from apify.storages import Dataset

if TYPE_CHECKING:
Expand All @@ -29,6 +29,8 @@

from apify._configuration import Configuration

logger = getLogger(__name__)

charging_manager_ctx: ContextVar[ChargingManager | None] = ContextVar('charging_manager_ctx', default=None)
"""Holds the current `ChargingManager` instance, if any.

Expand Down Expand Up @@ -450,6 +452,7 @@ async def charge(self, event_name: str, *, count: int = 1) -> ChargeResult:
pass
elif event_name in self._pricing_info:
await self._client.run(self._actor_run_id).charge(event_name, count=charged_count)
logger.debug(f"Charged {charged_count} occurrence(s) of event '{event_name}'.")
elif event_name in self._tier_priced_events:
logger.warning(
f"Event '{event_name}' is tier-priced and is not chargeable via the pay-per-event API."
Expand Down
4 changes: 3 additions & 1 deletion src/apify/_proxy_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
import re
from dataclasses import dataclass, field
from logging import getLogger
from re import Pattern
from typing import TYPE_CHECKING, Any

Expand All @@ -22,13 +23,14 @@
from apify._configuration import Configuration
from apify._consts import ApifyEnvVars
from apify._utils import docs_group
from apify.log import logger

if TYPE_CHECKING:
from apify_client import ApifyClientAsync

from apify import Request

logger = getLogger(__name__)

APIFY_PROXY_VALUE_REGEX = re.compile(r'^[\w._~]+$')
COUNTRY_CODE_REGEX = re.compile(r'^[A-Z]{2}$')
# ISO 3166-2 subdivision codes are 1-3 uppercase alphanumeric characters, e.g. 'CA', 'NSW', '9' (Wien, AT-9)
Expand Down
6 changes: 4 additions & 2 deletions src/apify/events/_apify_event_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import asyncio
import contextlib
import time
from logging import getLogger
from typing import TYPE_CHECKING, Annotated, Self

import websockets.asyncio.client
Expand All @@ -17,7 +18,6 @@

from apify._utils import docs_group
from apify.events._types import DeprecatedEvent, EventMessage, SystemInfoEventData, UnknownEvent
from apify.log import logger

if TYPE_CHECKING:
from collections.abc import Generator
Expand All @@ -28,6 +28,8 @@
from apify._configuration import Configuration


logger = getLogger(__name__)

event_data_adapter = TypeAdapter[EventMessage | DeprecatedEvent | UnknownEvent](
Annotated[EventMessage, Discriminator('name')] | DeprecatedEvent | UnknownEvent
)
Expand Down Expand Up @@ -195,7 +197,7 @@ async def _handle_platform_message(self, message: str | bytes) -> None:
return

if isinstance(parsed_message, UnknownEvent):
logger.info(
logger.debug(
f'Unknown message received: event_name={parsed_message.name}, event_data={parsed_message.data}'
)
return
Expand Down
7 changes: 0 additions & 7 deletions src/apify/scrapy/middlewares/apify_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ async def process_request(self, request: Request) -> None:
Raises:
ValueError: If username and password are not provided in the proxy URL.
"""
Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: request={request}')
url = await self._get_new_proxy_url()

if not (url.username and url.password):
Expand All @@ -82,8 +81,6 @@ async def process_request(self, request: Request) -> None:
basic_auth_header = get_basic_auth_header(url.username, url.password)
request.headers[b'Proxy-Authorization'] = basic_auth_header

Actor.log.debug(f'ApifyHttpProxyMiddleware.process_request: updated request.meta={request.meta}')

def process_exception(
self,
request: Request,
Expand All @@ -100,10 +97,6 @@ def process_exception(
process_exception() methods of installed middleware, until no middleware is left and the default
exception handling kicks in.
"""
Actor.log.debug(
f'ApifyHttpProxyMiddleware.process_exception: request={request}, exception={exception}',
)

if isinstance(exception, TunnelError):
Actor.log.warning(
f'ApifyHttpProxyMiddleware: TunnelError occurred for request="{request}", '
Expand Down
4 changes: 0 additions & 4 deletions src/apify/scrapy/pipelines/actor_dataset_push.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

from logging import getLogger
from typing import TYPE_CHECKING

from itemadapter.adapter import ItemAdapter
Expand All @@ -10,8 +9,6 @@
if TYPE_CHECKING:
from scrapy import Item

logger = getLogger(__name__)


class ActorDatasetPushPipeline:
"""A Scrapy pipeline for pushing items to an Actor's default dataset.
Expand All @@ -25,6 +22,5 @@ async def process_item(
) -> Item:
"""Pushes the provided Scrapy item to the Actor's default dataset."""
item_dict = ItemAdapter(item).asdict()
logger.debug(f'Pushing item={item_dict} to the dataset.')
await Actor.push_data(item_dict)
return item
12 changes: 0 additions & 12 deletions src/apify/scrapy/requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ def to_apify_request(scrapy_request: ScrapyRequest, spider: Spider) -> ApifyRequ
logger.warning('Failed to convert to Apify request: Scrapy request must be a ScrapyRequest instance.')
return None

logger.debug(f'to_apify_request was called (scrapy_request={scrapy_request})...')

# Configuration to behave as similarly as possible to Scrapy's default RFPDupeFilter.
#
# The body is stored twice on purpose: as `payload` (used for the extended unique key) and inside the serialized
Expand Down Expand Up @@ -150,7 +148,6 @@ def to_apify_request(scrapy_request: ScrapyRequest, spider: Spider) -> ApifyRequ
# a second base64 layer would only add ~33% overhead on the enqueue path.
apify_request.user_data['scrapy_request'] = scrapy_request_json

logger.debug(f'scrapy_request was converted to the apify_request={apify_request}')
return apify_request


Expand All @@ -171,14 +168,10 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
if not isinstance(cast('Any', apify_request), ApifyRequest):
raise TypeError('apify_request must be an apify.Request instance')

logger.debug(f'to_scrapy_request was called (apify_request={apify_request})...')

# If the apify_request comes from the Scrapy
if 'scrapy_request' in apify_request.user_data:
# Deserialize the Scrapy ScrapyRequest from the apify_request by parsing the stored JSON and reconstructing
# the Scrapy ScrapyRequest object from its dictionary representation.
logger.debug('Restoring the Scrapy ScrapyRequest from the apify_request...')

scrapy_request_json = apify_request.user_data['scrapy_request']
if not isinstance(scrapy_request_json, str):
raise TypeError('the stored scrapy_request must be a string')
Expand All @@ -194,8 +187,6 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
if not isinstance(scrapy_request, ScrapyRequest):
raise TypeError('scrapy_request must be an instance of the ScrapyRequest class')

logger.debug(f'Scrapy ScrapyRequest successfully reconstructed (scrapy_request={scrapy_request})...')

# Update the meta field with the meta field from the apify_request
meta = scrapy_request.meta or {}
meta.update({'apify_request_unique_key': apify_request.unique_key})
Expand All @@ -204,8 +195,6 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ

# If the apify_request comes directly from the Scrapy, typically start URLs.
else:
logger.debug('Gonna create a new Scrapy ScrapyRequest (cannot be restored)')

scrapy_request = ScrapyRequest(
url=apify_request.url,
method=apify_request.method,
Expand All @@ -222,5 +211,4 @@ def to_scrapy_request(apify_request: ApifyRequest, spider: Spider) -> ScrapyRequ
if apify_request.user_data:
scrapy_request.meta['userData'] = apify_request.user_data

logger.debug(f'an apify_request was converted to the scrapy_request={scrapy_request}')
return scrapy_request
9 changes: 1 addition & 8 deletions src/apify/scrapy/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,17 +134,14 @@ def enqueue_request(self, request: Request) -> bool:
Returns:
True if the request was successfully enqueued, False otherwise.
"""
logger.debug(f'ApifyScheduler.enqueue_request was called (scrapy_request={request})...')

if not isinstance(self.spider, Spider):
raise TypeError('self.spider must be an instance of the Spider class')

apify_request = to_apify_request(request, spider=self.spider)
if apify_request is None:
logger.error(f'Request {request} could not be converted to Apify request.')
logger.warning(f'Request {request} could not be converted to Apify request; skipping it.')
return False

logger.debug(f'Converted to apify_request: {apify_request}')
if not isinstance(self._rq, RequestQueue):
raise TypeError('self._rq must be an instance of the RequestQueue class')

Expand All @@ -156,7 +153,6 @@ def enqueue_request(self, request: Request) -> bool:
logger.exception('Failed to enqueue the request to the request queue.')
raise

logger.debug(f'rq.add_request result: {result}')
return not bool(result.was_already_present)

def next_request(self) -> Request | None:
Expand All @@ -165,7 +161,6 @@ def next_request(self) -> Request | None:
Returns:
The next request, or None if there are no more requests.
"""
logger.debug('next_request called...')
if not isinstance(self._rq, RequestQueue):
raise TypeError('self._rq must be an instance of the RequestQueue class')

Expand All @@ -177,7 +172,6 @@ def next_request(self) -> Request | None:
logger.exception('Failed to fetch the next request from the request queue.')
raise

logger.debug(f'Fetched apify_request: {apify_request}')
if apify_request is None:
return None

Expand Down Expand Up @@ -206,5 +200,4 @@ def next_request(self) -> Request | None:
if scrapy_request is None:
return None

logger.debug(f'Converted to scrapy_request: {scrapy_request}')
return scrapy_request
3 changes: 1 addition & 2 deletions src/apify/storage_clients/_apify/_alias_resolving.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import logging
from asyncio import Lock
from functools import cached_property
from logging import getLogger
Expand Down Expand Up @@ -225,7 +224,7 @@ async def store_mapping(self, storage_id: str) -> None:
alias_map[self._storage_key] = storage_id

if not self._configuration.is_at_home:
logging.getLogger(__name__).debug(
logger.debug(
'AliasResolver storage limited retention is only supported on Apify platform. Storage is not exported.'
)
return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ async def _create_missing_metadata_for_input_file(key: str, record_path: Path) -
try:
content = await asyncio.to_thread(record_path.read_bytes)
except FileNotFoundError:
logger.warning(f'Input file disparaged on path: "{record_path}"')
logger.warning(f'Input file disappeared from path: "{record_path}"')
return

# Figure out the metadata from the file content
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/actor/test_actor_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ async def test_metamorph_fails_locally(caplog: pytest.LogCaptureFixture) -> None

matching = [r for r in caplog.records if 'Actor.metamorph()' in r.message]
assert len(matching) == 1
assert matching[0].levelname == 'ERROR'
assert matching[0].levelname == 'WARNING'
assert 'only supported when running on the Apify platform' in matching[0].message


Expand All @@ -149,7 +149,7 @@ async def test_reboot_fails_locally(caplog: pytest.LogCaptureFixture) -> None:

matching = [r for r in caplog.records if 'Actor.reboot()' in r.message]
assert len(matching) == 1
assert matching[0].levelname == 'ERROR'
assert matching[0].levelname == 'WARNING'
assert 'only supported when running on the Apify platform' in matching[0].message


Expand All @@ -165,7 +165,7 @@ async def test_add_webhook_fails_locally(caplog: pytest.LogCaptureFixture) -> No

matching = [r for r in caplog.records if 'Actor.add_webhook()' in r.message]
assert len(matching) == 1
assert matching[0].levelname == 'ERROR'
assert matching[0].levelname == 'WARNING'
assert 'only supported when running on the Apify platform' in matching[0].message


Expand Down
6 changes: 3 additions & 3 deletions tests/unit/actor/test_actor_lifecycle.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,14 +164,14 @@ async def test_fail_without_init_raises_runtime_error() -> None:
await Actor().fail()


async def test_reboot_in_local_environment_logs_error_message(
async def test_reboot_in_local_environment_logs_warning_message(
actor: _ActorType,
caplog: pytest.LogCaptureFixture,
) -> None:
"""Test that reboot() logs an error when not running on the Apify platform."""
"""Test that reboot() logs a warning when not running on the Apify platform."""
await actor.reboot()

# Check that the error message was logged
# Check that the warning message was logged
assert 'Actor.reboot() is only supported when running on the Apify platform.' in caplog.text


Expand Down
2 changes: 1 addition & 1 deletion tests/unit/events/test_apify_event_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def listener(data: Any) -> None:

async def test_unknown_event_is_logged(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture) -> None:
"""Test that unknown events are logged and not emitted."""
caplog.set_level(logging.INFO, logger='apify')
caplog.set_level(logging.DEBUG, logger='apify')
async with (
_platform_ws_server(monkeypatch) as (connected_ws_clients, client_connected),
ApifyEventManager(Configuration.get_global_configuration()),
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/scrapy/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ def test_enqueue_request_skips_non_serializable_request(
scheduler: ApifyScheduler,
caplog: pytest.LogCaptureFixture,
) -> None:
"""A request that cannot be converted (non-serializable meta) is not enqueued: returns False and logs an error."""
"""A request that cannot be converted (non-serializable meta) is not enqueued: returns False and logs a warning."""
rq = cast('mock.MagicMock', scheduler._rq)

# A set in `meta` is not JSON-serializable, so `to_apify_request` returns None.
scrapy_request = Request(url='https://example.com', meta={'tags': {'a', 'b'}})

with caplog.at_level(logging.ERROR, logger='apify.scrapy.scheduler'):
with caplog.at_level(logging.WARNING, logger='apify.scrapy.scheduler'):
result = scheduler.enqueue_request(scrapy_request)

assert result is False
Expand Down