77import requests
88from cachetools import TTLCache
99from cachetools import cached
10+ from urllib .parse import urljoin
1011
1112from say .config import configs
1213from say .crawler .patterns import get_patterns
@@ -106,7 +107,6 @@ def get_data(self, force=False):
106107
107108
108109class DigikalaCrawler :
109- PROXY = 'https://proxy.sayao.org/proxy?url=%s/'
110110 API_URL_NOT_FRESH = 'https://api.digikala.com/v2/product/%s/'
111111 API_URL_FRESH = 'https://api-fresh.digikala.com/v1/product/%s/'
112112 DKP_PATTERN = re .compile (r'.*/dkp-(\d+).*' )
@@ -119,7 +119,6 @@ def __init__(self, url):
119119
120120 def call_api (self , url ):
121121 try :
122- print (url )
123122 with urllib .request .urlopen (url ) as response :
124123 status_code = response .getcode ()
125124 content = response .read ().decode ('utf-8' )
@@ -128,7 +127,7 @@ def call_api(self, url):
128127 except urllib .error .URLError as e :
129128 # If there's an error, use proxy
130129 try :
131- proxy_url = self . PROXY
130+ proxy_url = urljoin ( configs . NEST_API_URL , "api/dao/crawler/digikala?url=%s/" ) % url
132131 with urllib .request .urlopen (proxy_url ) as proxy_response :
133132 proxy_status_code = proxy_response .getcode ()
134133 proxy_content = proxy_response .read ().decode ('utf-8' )
0 commit comments