Skip to content

Commit 4e63928

Browse files
committed
crawler changes
1 parent e4901c6 commit 4e63928

1 file changed

Lines changed: 2 additions & 3 deletions

File tree

say/crawler/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import requests
88
from cachetools import TTLCache
99
from cachetools import cached
10+
from urllib.parse import urljoin
1011

1112
from say.config import configs
1213
from say.crawler.patterns import get_patterns
@@ -106,7 +107,6 @@ def get_data(self, force=False):
106107

107108

108109
class DigikalaCrawler:
109-
PROXY = 'https://proxy.sayao.org/proxy?url=%s/'
110110
API_URL_NOT_FRESH = 'https://api.digikala.com/v2/product/%s/'
111111
API_URL_FRESH = 'https://api-fresh.digikala.com/v1/product/%s/'
112112
DKP_PATTERN = re.compile(r'.*/dkp-(\d+).*')
@@ -119,7 +119,6 @@ def __init__(self, url):
119119

120120
def call_api(self, url):
121121
try:
122-
print(url)
123122
with urllib.request.urlopen(url) as response:
124123
status_code = response.getcode()
125124
content = response.read().decode('utf-8')
@@ -128,7 +127,7 @@ def call_api(self, url):
128127
except urllib.error.URLError as e:
129128
# If there's an error, use proxy
130129
try:
131-
proxy_url = self.PROXY
130+
proxy_url = urljoin(configs.NEST_API_URL, "api/dao/crawler/digikala?url=%s/") % url
132131
with urllib.request.urlopen(proxy_url) as proxy_response:
133132
proxy_status_code = proxy_response.getcode()
134133
proxy_content = proxy_response.read().decode('utf-8')

0 commit comments

Comments
 (0)