
    1i                       d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlmZm	Z	 d dl
mZmZmZmZ d dlmZmZ d dlmZmZ d dlmZ d dlZd dlmZ d dlmZ d d	lmZ d d
lmZ d dl m!Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z2m3Z3 d dl4Z4d dl5Z5d dl6m7Z7 d dl8m9Z9 d dl:m;Z; d dl<m=Z= d dlm>Z> 	 d dl?m@Z@ dZA ej                  eD      ZEg dZFg dZGddd d!d"d#d$d%d&d'd(d)d*ZHd+eId,eIfd-ZJdd.eKd/eId0eIfd1ZLd2eeM   fd3ZNd4eeM   d2eeM   fd5ZOd6eMd2eeM   fd7ZPd6eMd2eeM   fd8ZQd9ed2eRfd:ZSdd;eMd<eMd=eId2ee   fd>ZTd? ZU ed@A      ZVdB ZWdCeMd2eMfdDZXdCeMd2eMfdEZYdFeMd2eeI   fdGZZdHeId2eeIeIf   fdIZ[dHeeI   d2eMfdJZ\ddCeMd;eMd2eeI   fdKZ]ddCeMd;eMd2eeI   fdLZ^	 	 ddMe.dCeMdHeeI   d<eMd;eMd=eIdNeeM   dOeKd2eRfdPZ_ddQeMdReKd2eRfdSZ`dT ZaddCeMdUeeM   dVeKd2eeM   fdWZbdXeMd2eMfdYZcdZeMd;eMd2eeI   fd[Zdd\d]d^d_d`dadbdcddZeg deZfh dfZgd;eMdgeKd2eMfdhZhdgeKd2eeI   fdiZidUeMd2eeeM   eeM   f   fdjZjdkeMdleMdmeeM   dneeM   d2eKf
doZkddCeMdUeMdXeMd2eeM   fdpZlddqeId0eIfdrZJddseIdteIfduZmdv Zndw ZoddxeRdyeRd2ej                  fdzZqd{ ZWd| ZrddUeMdNeMd2eRfd}Zsd~eMd2eeI   fdZtdd6eMdeKd2eRfdZud<eMd;eMd=eIdevd2ee   f
dZwd<eMd;eMd=eIdevd2ee   f
dZx	 ddUeMdeIdeKd2eeeI   eeM   f   fdZy G d d      Zzy# eB$ r dZAY 7w xY w)    N)datetime	timedelta)OptionalListDictTuple)urlparse	urlencode)UUIDuuid4)ThreadPoolExecutor)BeautifulSoup)	webdriver)Service)Options)expected_conditions)ChromeDriverManager)WebDriverWait)By)Keys)ActionChains)AsyncSession)select)HTTPExceptionstatus)Product)Vendor)	Violation)ScrapingResult)
quote_plus)sync_playwrightTF)zoMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36zuMozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36zeMozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36zPMozilla/5.0 (Windows NT 10.0; Win64; x64; rv:124.0) Gecko/20100101 Firefox/124.0zrMozilla/5.0 (Macintosh; Intel Mac OS X 14_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.3 Safari/605.1.15zNMozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0z}Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0)zsocks4://103.146.170.233:5678zsocks4://103.165.64.86:4153zhttp://108.165.152.59:80zsocks4://103.204.54.50:1080z|text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7gzip, deflate, bres-ES,es;q=0.9,en;q=0.8	max-age=0z@"Not_A Brand";v="8", "Chromium";v="122", "Google Chrome";v="122"z?0z	"Windows"documentnavigatenonez?11)AcceptAccept-EncodingAccept-LanguageCache-Controlz	Sec-Ch-UazSec-Ch-Ua-MobilezSec-Ch-Ua-PlatformzSec-Fetch-DestzSec-Fetch-ModezSec-Fetch-SitezSec-Fetch-UserUpgrade-Insecure-Requestsmin_secondsmax_secondsc                 Z    t        j                  | |      }t        j                  |       y)z1Introduce a random delay to mimic human behavior.Nrandomuniformtimesleep)r.   r/   delays      B/var/www/html/marco-python-backend/app/services/scraper_service.pyhuman_delayr8   R   s    NN;4EJJu    attempt
base_delay	max_delayc                     |d| z  z  }t        j                  d|dz        }t        ||z   |      }t        j                  |       y)zc
    Calculate exponential backoff delay.
    Useful for retry logic when hitting rate limits.
       r   皙?N)r2   r3   minr4   r5   )r:   r;   r<   r6   jitterfinal_delays         r7   exponential_backoff_delayrC   X   sB    
 !w,'E^^Aus{+Fefni0KJJ{r9   returnc                      t         j                  j                  d      xs t         j                  j                  d      } | r| S t        rt	        j
                  t              }|S y)zx
    Get a free proxy from environment or predefined list.
    For production, consider using a paid proxy service.
    
HTTP_PROXY
http_proxyN)osenvirongetFREE_PROXIESr2   choice)	env_proxyproxys     r7   get_free_proxyrO   d   sJ     

|,L

|0LIl+r9   rN   c                 Z    | sy| j                  d      s| j                  d      r| S d|  S )zFormat proxy URL for Selenium.Nhttp://socks)
startswith)rN   s    r7   get_selenium_proxy_urlrT   u   s5    	"e&6&6w&?UGr9   queryc                 D   g }	 t         j                  d       t        j                  j	                  d      }|st         j                  d       |S d}|| dddd}t        j                  ||d	      }|j                          |j                         }|j	                  d
g       dd D ]  }d|v s|j                  |d           t         j                  dt        |       d       |S # t        $ r,}t         j                  dt        |              Y d}~|S d}~ww xY w)z
    Search using Tavily API (best for AI/automation).
    Tavily is designed for AI agents and is much more reliable than browser automation.
    z'[Discovery] Using Tavily API for searchTAVILY_API_KEYzB[Discovery] TAVILY_API_KEY not configured - skipping Tavily searchzhttps://api.tavily.com/searchF   )api_keyrU   include_answermax_resultsinclude_raw_content   )jsontimeoutresultsNurlu+   [Discovery] ✓ Tavily API search returned  resultsz&[Discovery] Tavily API search failed: )loggerinforH   rI   rJ   warningrequestspostraise_for_statusr^   appendlen	Exceptionstr)	rU   r`   rY   ra   payloadresponsedataresultes	            r7   search_tavily_apirr   ~   s   
 GJ=>**..!12NN_`N- ##(
 ==7B?!!#}} hhy"-bq1 	.Fve}-	. 	A#g,xXY N  J?AxHIINJs%   AC* AC* 08C* *	D3!DDc                     t        |       S )z
    Search using Tavily API instead of Selenium (avoids detection completely).
    Falls back to API-based search when Google blocks you.
    )rr   rU   s    r7   search_via_apiru      s    
 U##r9   rp   c           	      V   | j                  dd      | j                  dd      | j                  dd      | j                  dd      | j                  dd      | j                  dd      g}dj                  d	 |D              j                         }d
|v xs d|j                  dd      v }|S )a  
    Check if a SERP API result contains 'Soria Natural' brand.
    Searches in title, vendor name (source), and other product fields.
    
    Args:
        result: SERP API result dictionary
    
    Returns:
        True if 'Soria Natural' is found, False otherwise
    title sourcevendor_namebranddescriptionsnippet c              3   2   K   | ]  }t        |        y wN)rl   ).0fields     r7   	<genexpr>z+is_soria_natural_product.<locals>.<genexpr>   s     EESZEs   zsoria naturalu   soriañatural)rJ   joinlowerreplace)rp   fields_to_checkcombined_textis_soria_naturals       r7   is_soria_natural_productr      s     	

7B

8R 

="%

7B

="%

9b!O HHE_EEKKMM '-7l?mNcNcdgikNl;lr9   product_namebarcodemspc                    g }	 t         j                  d       t        j                  j	                  d      }|st         j                  d       |S |dk(  r,t         j                  d       t         j                  d       |S |  }t         j                  d| d       d	}d
|d|dd}	 t        j                  ||d      }|j                          |j                         }	|	s|S d|	v r)t         j                  d|	j	                  d              |S |	j	                  dg       }|st         j                  d       dD ]  }||	v s|	|   s t         j                  d       |rd|v r| j                         }	 d
||d}t        j                  ||d      }|j                          |j                         }|j	                  dg       }|r%t         j                  dt        |       d       |}nt         j                  d       d}d}d}t#        |dd        D ]  \  }}	 |d!z  }|j	                  d"d#      }|j	                  d$d%      xs |j	                  d&d%      }|j	                  d'd%      }t%        |      s@|d!z  }t         j                  d(|d!z    d)| d*       t         j'                  d+|dd,         |d!z  }|j	                  d-      }|y|j	                  d.d%      }|re	 t        |      j)                  d/d%      j)                  d0d%      j)                  d1d%      j)                  d2d3      j                         }t+        |      }|s7||||d4d5}|r|r||k  rd6|d7<   d6|d8<   nd9|d7<   d9|d8<   n
d:|d7<   d:|d8<   |j1                  |       t         j                  d;|d!z    d)| d<|dd=  d>|rd1|d?nd@ dA|d7   j3                          dB        t         j                  dDdE        t         j                  dF       t         j                  dG|        t         j                  dH|        t         j                  dI|        t         j                  dJdE dK       t         j                  dLt5        d t        |             dMt        |       dNt        |       dO       |S # t        j                  j                  $ r-}
t         j                  dt        |
              |cY d}
~
S d}
~
wt        $ r-}t         j                  dt        |              |cY d}~S d}~ww xY w# t         $ r,}t         j                  dt        |              Y d}~Yd}~ww xY w# t        t,        t.        f$ r d}Y 3w xY w# t         $ r/} t         j                  dC| d)t        |               Y d} ~ d} ~ ww xY w# t         $ r.} t         j                  dPt        |        dQR       Y d} ~ |S d} ~ ww xY w)Sa  
    Search using SERP API Google Shopping Light Engine.
    Filters results to only include 'Soria Natural' branded products.
    Uses direct HTTP requests to SERP API (more reliable than library).
    
    Args:
        product_name: Product name for search
        barcode: Product barcode
        msp: Minimum Selling Price (optional, used for violation detection)
    
    Returns:
        List of dictionaries with product and vendor information (filtered for Soria Natural only)
    zG[Discovery SERP] Using SERP API Google Shopping Light Engine for searchSERP_API_KEYuD   [Discovery SERP] ❌ SERP_API_KEY not found in environment variablesyour_serp_api_key_hereu=   [Discovery SERP] ❌ SERP_API_KEY is set to placeholder valuezV[Discovery SERP] Please replace 'your_serp_api_key_here' with your actual SERP API keyz [Discovery SERP] Search query: ''https://serpapi.com/search.jsongoogle_shopping_lighteszSpain )engineqglrY   location   paramsr_   z&[Discovery SERP] HTTP request failed: Nz0[Discovery SERP] Failed to parse JSON response: errorz[Discovery SERP] API Error: shopping_resultsu=   [Discovery SERP] ⚠️ No 'shopping_results' key in response)r`   productsitemssearch_resultsorganic_resultszW[Discovery SERP] No results found. Trying fallback: searching with product name only...r~   )r   r   rY   u7   [Discovery SERP] ✓ Fallback search successful! Found z results with product name onlyuI   [Discovery SERP] ⚠️ Fallback search also returned no shopping_resultsz)[Discovery SERP] Fallback search failed: r   r]      ry   Unknownproduct_linkrx   linkrw      [Discovery SERP] ⊘ Result : z) - FILTERED OUT (Not Soria Natural brand)z[Discovery SERP]   Title: P   extracted_priceprice$   €u   ₹,.serp_api_shopping_light)rz   
vendor_urlscraped_priceproduct_titlery   	violationr   compliance_status	compliantunknown   [Discovery SERP] ✓ Result  - <   
 - Price: .2fN/A - Status: u    ✅ SORIA NATURALz&[Discovery SERP] Error parsing result 
[Discovery SERP] P================================================================================z4[Discovery SERP] SORIA NATURAL BRAND FILTER SUMMARY:z9[Discovery SERP]   Total results received from SERP API: u5   [Discovery SERP]   ✅ Soria Natural products found: u@   [Discovery SERP]   ⊘ Non-Soria Natural products filtered out: [Discovery SERP] 
u:   [Discovery SERP] ✓ SERP API search completed. Processed z out of z available results, returned z$ Soria Natural products for analysisz)[Discovery SERP] SERP API search failed: Texc_info)rc   rd   rH   rI   rJ   r   rf   rh   r^   
exceptionsRequestExceptionrl   
ValueErrorre   striprj   rk   	enumerater   debugr   floatAttributeError	TypeErrorri   upperr@   )!r   r   r   r`   rY   search_queryserp_urlr   rn   ro   	req_error
json_errorr   alt_keyname_only_queryfallback_paramsfallback_responsefallback_datafallback_resultsfallback_errortotal_checkedtotal_soria_naturaltotal_filtered_outidxrp   rz   r   rw   r   	price_strprice_cleanresult_dictrq   s!                                    r7   search_serp_apir      s    GjZ]^**..0 LL_`N..LLXYLLqrN '6|nAFG 5 . 
		||HVRHH%%'==?D N d?LL78I7JKLN  88$6;  NNZ\ a d?tG}
 NNtv 3,."."4"4"6f"9,#*'O
 )1Xo_a(b%%668$5$:$:$<M'4'8'89KR'P$'&]^abr^s]t  uT  %U  V+;()rt
 $%5cr%:; ?	KC>" %jj9=%zz."=WFTVAW

7B/ 07&!+&KK">sQwir+V  !A  BLL#=eCRj\!JK#q(# 

#45= &

7B 7I )*-i.*@*@b*I*Q*QRWY[*\*d*dejln*o*w*wx{  ~A  +B  +H  +H  +JK$)+$6E $ $/".%*%*7 Ss{0;H-;F$780;H-;F$78,5K)7@K 34{+:37)2k]RUVbcfdfVgUhhr  INuxy~  @C  yD  tE  TY  sZ  Ze  fq  rz  f{  fA  fA  fC  eD  DV  W  Xw?	D 	)&23JLOP]_`KL_K`abVWiVjkl'xr23PQTUWY\]mYnQoPppxy|  ~N  zO  yP  Pm  nq  ry  nz  m{  {_  `  	a
 NS ""33 	LLA#i.AQRSN 	LLKCPZOK\]^N	` ! fNN%NsSaObNc#deefH !+NIF )$(E)B  !GuBsSTvhWX  Z@QITXYYNZs  AV> 0V> &V> )8R- !V> %,V> 2V> V> -V> 9BT. V> !BV:V> ;,V(A$U&VV> BVCV> -T+
"S2,T+-V> 2T+>"T& T+!V> &T++V> .	U#7!UV> U##V> &V <V?V  V	V;$V60V> 6V;;V> >	W5#W00W5c                     t        j                         } | dk7  ryt        j                  d       g d}g }|D ]=  }	 t	        j
                  d|gdd      }|j                  dk7  r|j                  |       ? |r=t        j                  d	d
j                  |              t        j                  d       yd}	 t	        j
                  ddgdd      }|j                  dk(  rB|j                  j                         j                         }t        j                  d|        d}|sFg d}|D ]=  }	t         j"                  j%                  |	      s#t        j                  d|	        d} n |st        j                  d       y	 t	        j
                  ddgddd      }|j                  dk(  rK|j                  j                         r1t        j                  d|j                  j                                 yt        j                  d|j&                          t        j                  d       y# t        $ r |j                  |       Y w xY w# t        $ r,}t        j                  dt        |              Y d}~_d}~ww xY w# t        j(                  $ r t        j                  d       Y yt        $ r+}t        j                  dt        |              Y d}~yd}~ww xY w)zFEnsure required system dependencies are installed for Chrome on Linux.LinuxTzF[Chrome Dependencies] Checking Chrome and system utilities on Linux...)readlinkdirnamecatbasenamegrepsedawkwhichr>   capture_outputr_   r   u7   [Chrome Dependencies] ✗ Missing core shell commands: z, zZ[Chrome Dependencies] DevOps must install: sudo apt-get install -y coreutils grep sed gawkFgoogle-chromerX   u0   [Chrome Dependencies] ✓ Chrome found in PATH: z,[Chrome Dependencies] Could not check PATH: N)/usr/bin/google-chrome/usr/bin/google-chrome-stable/usr/bin/chromiumu+   [Chrome Dependencies] ✓ Chrome found at: uG   [Chrome Dependencies] ✗ Chrome NOT found - DevOps must install Chrome	--version
   r   textr_   u-   [Chrome Dependencies] ✓ Chrome executable: u3   [Chrome Dependencies] ✗ Chrome --version failed: zT[Chrome Dependencies] This indicates missing shell commands (readlink, dirname, cat)uT   [Chrome Dependencies] ✗ Chrome --version timed out (likely missing shell commands)u3   [Chrome Dependencies] ✗ Could not verify Chrome: )platformsystemrc   rd   
subprocessrun
returncoderi   rk   r   r   stdoutdecoder   r   rl   rH   pathexistsstderrTimeoutExpired)
r   required_commandsmissing_commandscmdrp   chrome_foundchrome_pathrq   pathsr   s
             r7   ensure_chrome_dependenciesr    s   __F
KKXY Y  )	)^^WcN4QRSF  A% '',	) NtyyYiOjNklmqr LN/ :4YZ[! --..0668KKKJ;-XYL
 ` 	Dww~~d#I$PQ#		 ^_+ >tZ^hjk!fmm&9&9&;KKGH[H[H]G^_`LLNv}}o^_LLopS  	)##C(	)   NCCF8LMMN4 $$ kl J3q6(STsO   :H3:A+I A4J ;7J 3II	J	!JJ	(K)6K)>!K$$K)r   )max_workersc                 ^   	 t        | d      ri| j                  r\| j                  }t        j                  j	                  |      r0t        j                  |d       t        j                  d|        yyyy# t        $ r+}t        j                  dt        |              Y d}~yd}~ww xY wz(Clean up Chrome session temporary files._user_data_dirTignore_errorsz,[Chrome Cleanup] Removed session directory: z$[Chrome Cleanup] Failed to cleanup: N)hasattrr
  rH   r   r   shutilrmtreerc   r   rk   re   rl   driveruser_data_dirrq   s      r7   cleanup_chrome_sessionr    s    H6+,1F1F"11Mww~~m,m4@KM?[\ - 2G,
  H=c!fXFGGH   A2A8 8	B,!B''B,ra   c                 P    t        |       }|j                  j                  dd      S )zExtract domain name from URL.www.rx   )r	   netlocr   )ra   parseds     r7   get_domain_from_urlr    s"    c]F==  ,,r9   c                 :   	 t        | j                         j                               }|j                  j	                  dd      j                  d      d   }|j                  j                  d      }| | S #  | j                         j                         cY S xY w)z
    Normalize a URL for comparison purposes.
    Removes www, trailing slashes, query parameters to compare similar URLs.
    r  rx   :r   /)r	   r   r   r  r   splitr   rstrip)ra   r  domainr   s       r7   normalize_url_for_comparisonr     s    
	##))+++-.&&vr288=qA{{!!#&$  #yy{  ""s   A5A8 8 Br   c                     	 t        j                  ddt        |       j                               }t	        |j                  dd            S # t        t        f$ r Y yw xY w)z Normalize price string to float.z[^\d.,]rx   r   r   N)resubrl   r   r   r   r   r   )r   cleans     r7   normalize_pricer%    sS    z2s9~';';'=>U]]3,--' s   AA AAr   c                     	 | rt        |       nd}t        ||z
  d      }t        |dkD  r||z  dz  ndd      }||fS # t        t        f$ r+}t        j                  dt        |              Y d}~yd}~ww xY w)a   Calculate price difference and percentage difference.
    
    Args:
        msp: Minimum Selling Price (can be Decimal from database or float)
        scraped_price: Price found via scraping (always float)
    
    Returns:
        Tuple of (price_difference, percentage_difference)
    r   r>   d   z8[Price Calculation] Error calculating price difference: N)        r(  )r   roundr   r   rc   re   rl   )r   r   	msp_float
difference
percentagerq   s         r7   calculate_price_differencer-     s    "%E#J1	9}4a8
Y]J2S8PQSTU
:%%z" QRUVWRXQYZ[s   := A7!A22A7c                     	 |y| rt        |       nd}|y||k  ry||kD  ryy# t        t        f$ r+}t        j	                  dt        |              Y d}~yd}~ww xY w)a2  Determine compliance status based on price comparison.
    
    Args:
        msp: Minimum Selling Price (can be Decimal from database or float)
        scraped_price: Price found via scraping (optional float)
    
    Returns:
        Status string: 'violation', 'complain', 'compliant', or 'unknown'
    Nr   r   complainr   z.[Compliance Status] Error determining status: )r   r   r   rc   re   rl   )r   r   r*  rq   s       r7   determine_compliance_statusr0    sq      #&E#J4	9$Y&z" GAxPQs    $ $ $ $ A!AAc           
        K   	 t         j                  d| dd         t        j                  t              dddddd	dd
d	}	 t        j                  | |d      }|j                  dk(  r0t         j                  d| dd  d       t        | |       d{   S |j                          |j                  }g d}|D ]y  }t)        j*                  ||t(        j,                        }	|	s+|	j/                  d      j1                  dd      }
	 t3        |
      }t         j5                  d| d| dd  d       |c S  t         j                  d| dd         y7 # t
        j                  j                  $ r}|j                  j                  dk(  r6t         j                  d| dd  d       t        | |       d{  7  cY d}~S t         j                  d|j                  j                   d| dd         Y d}~yd}~wt
        j                  j                  t
        j                  j                   t
        j                  j"                  f$ rK}t         j                  dt%        |      j&                   d       t        | |       d{  7  cY d}~S d}~ww xY w# t6        t8        f$ r Y w xY w# t:        $ r1}t         j                  d| dd  d t=        |              Y d}~yd}~ww xY ww)!ak  
    Scrape price from vendor website.
    Tries to extract price from HTML content using multiple patterns.
    Falls back to Selenium if requests library is blocked (403 Forbidden).
    
    Args:
        url: Website URL to scrape
        product_name: Product name (for context, optional)
    
    Returns:
        Price as float if found, None otherwise
    z3[Website Scraper] Attempting to scrape price from: Nr   zJtext/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8r#   r"   zhttps://www.google.com/r(   z
keep-aliver$   )	z
User-Agentr)   r+   r*   RefererDNT
Connectionr-   r,   r   )headersr_   i  u*   [Website Scraper] 🚫 403 Forbidden from r   z0 - requests blocked, falling back to Selenium...z - falling back to Seleniumz[Website Scraper] HTTP z error for z"[Website Scraper] Request failed (z), falling back to Selenium...)   €\s*([\d,]+\.?\d*)u   €\s*([\d]+,[\d]{2})   ([\d,]+\.?\d*)\s*€u   ([\d]+,[\d]{2})\s*€z,["\']?price["\']?\s*:\s*["\']?([\d,]+\.?\d*)z,data-price["\']?\s*=\s*["\']?([0-9,]+\.?\d*)z price\s*=\s*["\']?([\d,]+\.?\d*)z,precioNumerico\s*[:\s=]+["\']?([\d,]+\.?\d*)u@   <span[^>]*class=["\'].*price.*["\'][^>]*>.*?€\s*([\d,]+\.?\d*)u?   <div[^>]*class=["\'].*price.*["\'][^>]*>.*?€\s*([\d,]+\.?\d*)u"   <span[^>]*>.*?€\s*([\d,]+\.?\d*)z:(?:price|precio|coste|costo)\s*[:\s=]+["\']?([\d,]+\.?\d*)u"   (\d+[.,]\d{2})(?:\s*€|\s+euros?)r   r   r   u%   [Website Scraper] ✓ Found price € from z (via requests)u5   [Website Scraper] ⚠️ No price found in HTML from z,[Website Scraper] Unexpected error scraping r   )rc   r   r2   rL   GOOGLE_USER_AGENTSrf   rJ   status_codere   _scrape_price_with_seleniumrh   r   r   	HTTPErrorrn   TimeoutConnectionErrorr   type__name__r"  search
IGNORECASEgroupr   r   rd   r   r   rk   rl   )ra   r   r5  rn   html_contenthttp_errrq   price_patternspatternmatchr   r   s               r7   scrape_price_from_websiterI  3  s    OJ3sPR8*UV !--(:;b820&),(

	H||C"EH ##s*!KCPSQSH:  VF   G  H8lKKK%%'#==L
. & 		GIIg|R]]CE!KKN223<	!),EKK"GwfUXY\Z\U]T^^m no L		 	NsSVTVxjYZi L
 "",, 	  ,,3!KCPSQSH:Upqr8lKKKK!89J9J9V9V8WWbcfgjhjckblmn##++X-@-@-P-PRZReReRvRvw 	HLL=d1g>N>N=OOmno4S,GGGG	HH #I.   Ec#2hZrRUVWRXQYZ[s  K->J0 AE EE K-E 60J0 '!J0 	+J4J0 6K-7J0 K-E J3AG97F:8G9<J=J0 K-2G94J0 8K-9AJ;JJ
JJJ0 K-JJ0 J-)J0 ,J--J0 0	K*9'K% K-%K**K-c           	      8  K   d}	 t         j                  d| dd  d       t         j                  d       t        d      }|j	                  |        t         j                  d       t        d	      D ]  }	 |j                  d
      }t        |j                  d            }|dz  dk(  r!t         j                  d|dz    d| d|        |r |dkD  rt         j                  d| d        nt        dd        |j                  }t        |      }t         j                  d| d       |dk  rVd|v rRt         j                  d       t         j                  d| dd  d       	 |r	 t        |       |j                          yyt        |      }|r@t         j                  d| d| dd  d        ||r	 t        |       |j                          S S |j                  d      }	t        j                  d!|	      }
|
rt         j                  d"t!        |
              t#        t!        d# |
D              d$      }|D ]Q  }d%|cxk  rd&k  sn t         j                  d| d| dd         |c |r	 t        |       |j                          S S  t         j                  d'| dd         t         j                  d(       	 |r	 t        |       |j                          yy#  t        dd       Y xY w#  Y yxY w#  Y S xY w#  Y S xY w#  Y yxY w# t$        $ rQ}t         j                  d)t'        |              Y d}~|r$	 t        |       |j                          y#  Y yxY wyd}~ww xY w# |r$	 t        |       |j                          w #  Y w xY ww xY ww)*a=  
    Fallback to Selenium for scraping when requests library is blocked.
    Used for sites with bot detection (403 Forbidden) including Cloudflare protection.
    
    Note: Cloudflare protection is very difficult to bypass. This function attempts
    to scrape the page but may fail on heavily protected sites.
    Nz[Selenium Fallback] Loading F   z with Selenium...z[[Selenium Fallback] Note: Site may have Cloudflare protection which blocks automated accessTheadlesszW[Selenium Fallback] Waiting for page to load (including potential Cloudflare bypass)...   z)return document.readyState === 'complete'return document.body.innerTextrX   r   z[Selenium Fallback] Attempt r   z: readyState=z, text_length=  z)[Selenium Fallback] Page content loaded ( chars)      ?z$[Selenium Fallback] Page HTML size: z bytesi  __cf_chl_tkuT   [Selenium Fallback] ⚠️ Detected Cloudflare challenge - page blocker not bypassedz[Selenium Fallback] URL: z$ is protected by Cloudflare anti-botu'   [Selenium Fallback] ✓ Found price €r8  r   z (via Selenium/JSON-LD)z\d{1,3}[.,]\d{2}z/[Selenium Fallback] Found price-like patterns: c              3   R   K   | ]  }t        |j                  d d             ! yw)r   r   N)r   r   r   ps     r7   r   z._scrape_price_with_selenium.<locals>.<genexpr>  s      &RauQYYsC-@'A&Rs   %')reverse      ?     @u-   [Selenium Fallback] ⚠️ No price found on zV[Selenium Fallback] Suggestions: URL may require search, or be protected by Cloudflarez)[Selenium Fallback] Error with Selenium: )rc   rd   r   initialize_selenium_driverrJ   rangeexecute_scriptrj   r8   page_sourcere   r  quitextract_price_from_htmlr"  findallsetsortedrk   rl   )ra   r   r  r:   readytext_lengthr]  	html_sizer   	page_textpricesprices_sortedrq   s                r7   r;  r;    s     FF23s8*<MNOrs+T:

3 	mnRy 	$G$--.YZ!&"7"78X"YZQ;!#LL#?	{-X]W^^lmxly!z{[3.KK"KK=X_ `aAs#	$& (($	;I;fMN t <NNqsNN6s3Bxj@def8 &v. 3 (4KKA%sSVTVxjXopq, &v. ' ))*JK	/;LLJ3v;-XY"3&R6&R#R\`aM& !''KK"I%PVWZ[^\^W_V` ab L &v. !
 	Fs3BxjQRmo &v. Q$As#V  B3q6(KL&v. 	 &v. s"  NA1L 8A4K",L .K":A,L 'N*K6 N-L 4N7K=NA7L $L 0N3LN1L NL  N"K30L 6K:8N=L?NLNLN	M,!M'<M/  NM NM#!N'M,,M/ /N3NNNNNdbrz   packsc                 :  K   ddl m} 	 ||r|sy|rt        t              j	                   |t        j
                  |k(  t        j                  |k(  t        j                  |k(  t        j                  |k(  t        j                  |k(  t        j                  |k(              }	| j                  |	       d{   }
|
j                         j                         }|r&t        j                  d| d| d| d| d	| d
       y|rt        t              j	                   |t        j
                  |k(  t        j                   |k(  t        j                  |k(  t        j                  |k(  t        j                  |k(              }	| j                  |	       d{   }
|
j                         j                         }|r%t        j                  d| d| d|dd  d|        yy7 )7 M# t"        $ r+}t        j%                  dt'        |              Y d}~yd}~ww xY ww)aV  
    Check if a violation record with MATCHING vendor already exists for this product/price.
    Prevents duplicates created by finding the same vendor from different sources
    (e.g., registered vendor scraping vs SERP API discovery).
    
    Considers it a duplicate if BOTH of these match:
    - product_name
    - barcode_number
    - msp
    - scraped_price
    - vendor_name (NOT url, since same vendor can be found via different URLs)
    
    This prevents duplicate violations for the same vendor selling at the same price.
    
    Returns True if duplicate found, False otherwise.
    r   )and_NFz [Duplicate] Vendor match found: z	 selling z (barcode: z) @  (MSP: )Tz#[Duplicate] Exact URL match found: z), URL: r   	, Price: z[Duplicate Check] Error: )
sqlalchemyrl  r   r   wherebarcode_numberr   r   r   rz   rj  executescalarsfirstrc   rd   ra   rk   re   rl   )ri  ra   r   r   r   r   rz   rj  rl  stmtrp   existingrq   s                r7   check_duplicate_violationrx    s&    4  - | )$**,,7**l:MMS(++}<))[8OOu,	D ::d++F~~'--/H>{m9UaTbbmnumvvz  |I  {J  JQ  RU  QV  VW  X  Y )$**,,7MMS(++}<**l:MMS(D ::d++F~~'--/HA,{[bZccklopsqsltkuu~  @M  N  O  P3 ,$ ,  23q6(;<sl   HG$ HB%G$ 6G7A	G$  HBG$ G"AG$ HG$ "G$ $	H-!HHHHr   min_gbc                 >   	 ddl } |j                  |       }|j                  dz  }||k  rt        j	                  d|dd| d       yt        j                  d	|dd
| d       y# t        $ r+}t        j                  dt        |              Y d}~yd}~ww xY w)z.Check if there's enough disk space for Chrome.r   Ni   @u%   [Disk Space] ✗ Insufficient space: r   zGB available, need GBFu   [Disk Space] ✓ Available: zGB (threshold: zGB)Tz)[Disk Space] Could not check disk space: )	r  
disk_usagefreerc   r   r   rk   re   rl   )r   ry  r  statavailable_gbrq   s         r7   check_disk_spacer  /  s     v  &yyI.& LL@c@RReflemmopq3L3E_U[T\\_`a B3q6(KLs   AA( 
A( (	B1!BBc                     t        j                         } | dk7  ry	 t        j                  g ddd       t        j                  d       y# t        $ r+}t        j                  dt        |              Y d}~yd}~ww xY w)	z0Kill any orphaned Chrome/ChromeDriver processes.r   N)pkillz-9z-fchromeTrX   r   z2[Process Cleanup] Killed orphaned Chrome processesz,[Process Cleanup] Could not kill processes: )	r   r   r   r   rc   rd   rk   r   rl   )r   rq   s     r7   !cleanup_orphaned_chrome_processesr  A  sk    __FN6tUVWHI NCCF8LMMNs   /A 	B !A;;B r   r_   c                 .   t         st        j                  d       y	 t        j                  d       t	               5 }t        j                  d       |j                  j                  d      }|j                  t        j                  t              dd	      }t        j                  d
|  d       |j                  | d|dz         t        j                  d       	 |j                  d       t        j                  t        j                   dd             |j                  d       |j#                  d       t%        dd       |rt        j                  d| d       	 |j'                  d      }|rs|j)                          |j+                  |       |j,                  j/                  d       |j#                  d       t%        dd       t        j                  d| d       nt        j                  d       t        j                  d       |j5                         }|j7                          |r;t9        |      dkD  r-t        j                  d t9        |       d!       |cddd       S t        j                  d"       |r|ndcddd       S #  Y oxY w# t0        $ r+}t        j                  dt3        |              Y d}~d}~ww xY w# 1 sw Y   yxY w# t0        $ r+}t        j;                  d#t3        |              Y d}~yd}~ww xY w)$a  
    Use Playwright to bypass Cloudflare and return the rendered HTML for extraction.
    This avoids Cloudflare detection while keeping extraction logic in Selenium/BeautifulSoup.
    
    Args:
        url: Website URL to load
        search_query: Optional search term to execute on the page
        timeout: Timeout in seconds
    
    Returns:
        HTML content of the page or None if failed
    z?[Playwright] Playwright not available, cannot bypass CloudflareNz5[Playwright] Using Playwright to bypass Cloudflare...z*[Playwright] Launching Chromium browser...TrL  zes-ESzEurope/Madrid)
user_agentlocaletimezone_idz[Playwright] Navigating to ...load  )
wait_untilr_   z2[Playwright] Applying human behavior simulation...z.window.scrollTo(0, document.body.scrollHeight)rX        ?z
                Object.defineProperty(navigator, 'webdriver', {
                    get: () => undefined
                });
                domcontentloadedz[Playwright] Searching for 'z'...zPinput[type="search"], input[placeholder*="buscar"], input[placeholder*="search"]Enter       @z#[Playwright] Search completed for 'r   z![Playwright] Search box not foundz[Playwright] Search failed: z$[Playwright] Extracting page HTML...rP  u3   [Playwright] ✓ Successfully bypassed Cloudflare (z bytes)u@   [Playwright] ⚠ Page content is too small, may still be blockedz"[Playwright] Failed to load page: )PLAYWRIGHT_AVAILABLErc   re   rd   r!   r   chromiumlaunchnew_pager2   rL   r9  gotoevaluater4   r5   r3   wait_for_load_stater8   query_selectorclickfillkeyboardpressrk   rl   contentcloserj   r   )	ra   r   r_   rV  browserpage
search_boxrq   rD  s	            r7   get_page_html_via_playwrightr  O  s     XYKKL D	>!LLEFjj'''6G ##!==);<+ $ D LL6se3?@IIcfgnIE LLMNNO

6>>#s34    $$%78S! ;L>NOL!%!4!4j"J ""((*"5++G4 008#C-'J<.XY%Z['JK
 LL?@<<>L MMOL 1C 7QRUVbRcQddklm#CD	> D	>F ab'3|ID	> D	>80 ! LNN%A#a&#JKKLkD	> D	>L  9#a&BCs   K  B"K AJ+8K$BJ?A(K'	K  1K	K  JK	K&!KKKKKK  K   	L)!LLvendor_websitec           	      r   | sy| j                         j                         }|j                  d      sd|z   }	 t        |      }|j                  j                         }|r|j                  d      sd|z   }d| }|S # t
        $ r0}t        j                  d|  dt        |              |cY d}~S d}~ww xY w)u  
    Normalize vendor website URL for dictionary lookup.
    Converts various formats to a standard form for matching against VENDOR_SEARCH_URL_TEMPLATES keys.
    
    Examples:
        "https://www.dosfarma.com/" → "https://www.dosfarma.com"
        "dosfarma.com" → "https://www.dosfarma.com"
        "www.dosfarma.com" → "https://www.dosfarma.com"
        "http://dosfarma.com/path" → "https://www.dosfarma.com"
    
    Args:
        vendor_website: Raw vendor website URL (may be incomplete or have trailing slashes)
    
    Returns:
        Normalized URL in format: https://www.domain.com (no trailing slash, no path)
    rx   )rQ   https://r  r  z$[Normalization] Could not normalize r   N)	r   r   rS   r	   r  rk   rc   re   rl   )r  	url_cleanr  r  
normalizedrq   s         r7   #normalize_vendor_website_for_lookupr    s    " $$&,,.I  78*	)$$$& &++F3f_F  x(
 =n=MRPSTUPVxXYs   AA= =	B6%B1+B61B6htmlc                    t        | d      }g d}|D ](  }|j                  |      D ]  }|j                           * g d}|D ]  }|j                  |      }|s|j	                  dd      xs |j                  d      }|s@	 t        t        |      j                  dd	      j                               }	d
|	cxk  rdk  r"n nt        j                  d| d|	        |	c S  g }
|j                  ddi      D ]}  }|j	                  dd      xs |j                  d      }|s,	 t        t        |      j                  dd	      j                               }	d
|	cxk  rdk  rn n|
j                  |	        |
r2t!        |
      }	t        j                  d|	 dt#        |
       d       |	S |j%                         }|j                  d      D ]  }||j%                         v s|j&                  }t)        d      D ]  }| 4|j                         }t+        j,                  d|      }|rUt/        d |d   D              }	 t        |j                  d	d      j                  dd	            }	d
|	cxk  rdk  r	n n|	c c S |j&                  }  y# t        t        f$ r Y bw xY w# t        t        f$ r Y w xY w# t        $ r Y Hw xY w)uG  
    Extract the MAIN product price from a product page HTML.

    Root problem on PrestaShop bundle pages:
      - The main product price is in a SPAN with a content attribute:
          <span id="our_price_display" itemprop="price" content="12.001">12,00 €</span>
      - The "Contenido del paquete" section below uses a META tag:
          <meta itemprop="price" content="5.4">
      - Old code searched only for <meta itemprop="price">, so it found 5.4
        and never saw the span's content="12.001".

    Fix strategy:
      1. Strip all known bundle-component / related-product sections from the
         soup FIRST (section#blockpack, .related-products, etc.) so their
         itemprop elements are gone before any search.
      2. Search ALL elements (any tag) with itemprop="price" — read the
         content attribute first (machine-readable), fall back to text.
         Within the cleaned soup, take the MAXIMUM valid price.
      3. Platform-specific high-confidence selectors as an additional pass.
      4. DOM-walking regex near product name text as final fallback.
    html.parser)zsection#blockpackz
#blockpackz
.blockpackz#related-productsz.related-productsz#crosssellingz.crosssellingz#upsellz.upsellz.cross-sellsz	.up-sellsz#accessoriesz.accessoriesz#similar_productsz.similar_products)#our_price_display#buy_block [itemprop='price']".content_prices [itemprop='price']$.box-info-product [itemprop='price']1.product-prices .current-price [itemprop='price']".product-prices [itemprop='price'].summary .price > .amount).summary .price .woocommerce-Price-amount.product-summary .price .amount&[itemprop='offers'] [itemprop='price']#product-price%.product-info-main [itemprop='price']z".product-detail [itemprop='price']r  rx   Tr   r   r   r       @@z [PriceExtract] High-confidence '': itempropr   attrsz"[PriceExtract] Max itemprop price= (z elements after noise removal))r   rX   NuM   (\d{1,3}(?:[.,]\d{3})*[.,]\d{2})\s*€|€\s*(\d{1,3}(?:[.,]\d{3})*[.,]\d{2})c              3   &   K   | ]	  }|s|  y wr    rU  s     r7   r   z-extract_price_near_product.<locals>.<genexpr>L  s     $@1aQ$@s   r   )r   r   	decompose
select_onerJ   get_textr   rl   r   r   rc   r   r   r   find_allri   maxrj   r   parentr[  r"  r`  next)r  r   soupNOISE_SELECTORSseltagHIGH_CONFIDENCE_SELECTORSelrawr   
all_prices	full_nameelementr  _r   matchesr   s                     r7   extract_price_near_productr    s   , }-D
O"  ;;s# 	CMMO	! ) __S!ffY#>r{{{'>	#c(**34::<=Ee$u$?uCwOP( Jmm:w"7m8 	ffY#>r{{{'>	#c(**34::<=Ee$u$!!%(	 J9%3z?BSSqrs ""$I==d=+ ''^^F1X '>(**d  $$@
$@ @I %i&7&7R&@&H&Hc&R S%050#(L  !''( [ I& 		  I& 		4 & s8   AJ)AJ*:K J'&J'*J=<J= 	KKz8https://www.dosfarma.com/catalogsearch/result/?q={query}z!https://farmaes.es/?mot_q={query}z#https://linverd.com/en?km_q={query}z-https://www.farmaciasdirect.eu/?query={query}zhttps://herbolariorosana.com/resultado-busqueda?controller=search&orderby=position&orderway=desc&id_manufacturer_search=0&search_query={query}&submit_search=z4https://www.parafarma24.com/es/buscar?search={query}zFhttps://herbolarioemperatriz.es/module/iqitsearch/searchiqit?s={query}zuhttps://herboahorro.com/search?type=product&options%5Bunavailable_products%5D=show&options%5Bprefix%5D=last&q={query})zhttps://www.dosfarma.comzhttps://www.farmaes.eszhttps://www.linverd.comzhttps://www.farmaciasdirect.euz https://www.herbolariorosana.comzhttps://www.parafarma24.comz#https://www.herbolarioemperatriz.eszhttps://www.herboahorro.comr   r>         rX         >   demgmlofandcondelforlaslossinthexxixxlpackparaplussxxiforterj  siglosoriacomplexnaturalcapsulascapsulesextractotabletascomprimidos
pack_countc                 >    | j                         }|dk(  r|S | d| S )z
    Generate the search query for a given product and pack count.

    Pack 1  -> base name as-is   (e.g. "RESVERASOR PLUS")
    Pack N  -> base + " Pack N"  (e.g. "RESVERASOR PLUS Pack 2")
    r    Pack r  )r   r  bases      r7   generate_search_queryr  w  s/     DQV6*&&r9   c                     dddddddd}|j                  |      }|rt        | |d	      }|	 t        |      S 	 t        | j
                        S # t        t        f$ r Y 'w xY w# t        t        f$ r Y y	w xY w)
z
    Return the pre-calculated price threshold for a given pack count.
    Reads price_1_pack ... price_12_pack from the product row.
    Falls back to product.msp if the specific column is missing / None.
    price_1_packprice_2_packprice_3_packprice_4_packprice_5_packprice_6_packprice_12_packr  N)rJ   getattrr   r   r   r   )productr  attr_mapattrvals        r7   get_pack_pricer    s     H <<
#DgtT*?Sz!W[[!! z*  z" s"   
A A$ A! A!$A65A6c                    | j                         }d}g }d}|t        |      k  r||   }|j                         }|dk(  r5|dz   t        |      k  r$||dz      j                         r||dz      }|dz  }]|t        v s|j                         r|dz  }{t        |      dkD  r|j                  |       |dz  }|t        |      k  r|s|d   j                         g}||fS )a'  
    Break a search query such as "RESVERASOR PLUS Pack 2" into:
      - core_words : distinctive, non-generic lowercase words  -> ["resverasor"]
      - pack_number: the numeric pack count as a string        -> "2"  (or None)

    Used for smart link-scoring on vendor search result pages.
    Nr   r  r   r>   )r  rj   r   isdigitGENERIC_WORDSri   )r   wordspack_number
core_wordsiww_lowers          r7   decompose_search_queryr    s      E!%KJ	A
c%j.!H'')fQU!3a!e8L8L8NA,KFAm#qyy{FAq6A:g&	Q c%j.  Ahnn&'
{""r9   	link_text	link_hrefr  r  c                    ddl }| j                         }|j                         }|dz   |z   }d}|D ]  }	|	|v s|dz  } |r|j                  |      }
d|
z   dz   |
z   dz   |
z   dz   dz   |
z   d	z   dz   |
z   d
z   dz   |
z   dz   dz   |
z   dz   dz   |
z   }|j                  |||j                        r|dz  }|S |j                  d|
z   dz   |      r|dz  }|S )u  
    Score a candidate <a> link for how well it matches the search query.

    Scoring:
      +2  per core word found in link text or href
      +3  if pack_number appears in a pack-related context in the visible TEXT
          (adjacent to "pack", "ud", "x", "oferta", "unidad" etc.)
      +1  if pack_number appears bare in the text (weak — could be anything)
       0  pack_number match in href only — IGNORED entirely, because product
          IDs in URLs (e.g. /2327534-normacid...) contain digits that would
          otherwise cause false positives for every pack number.

    Example for "NORMACID Pack 2" (core_words=["normacid"], pack_number="2"):
      "NORMACID OFERTA 2 UD"             → +2 (core) +3 (pack ctx) = 5  ✅ winner
      "NORMACID CITRUS 32 COMPRIMIDOS"   → +2 (core) +0 (32 ≠ bare 2) = 2
      "Pack 3x2 Normacid"                → +2 (core) +0 (3x2 ≠ pack 2) = 2
      "Pack 6x NORMACID"                 → +2 (core) +0 = 2
    r   Nr~   r>   zpack\s*z|pack\s*x\s*|z\s*x\s*packz\s*udz	\s*unidadz|x\s*z(?!\d)z\s*x\s*(?!\d)z
|oferta\s*r  z(?<!\d)r   )r"  r   escaperA  rB  )r  r  r  r  _re
text_lower
href_lowercombinedscorewordnpat_strs               r7   smart_link_scorer     sw   & "J"Jc!J.HE  8QJE
 JJ{#N !"'(   "" 	 	 &	&
 
 
 ''   **     	 ::gz3>>:QJE
 L	 ZZ
Q2J?QJE Lr9   c                    t         j                  j                  d      }|st        j	                  d       yt        |      \  }}t        j                  |      }t        j                  |xs d      }d| d| d}|rt        |      nd}	|	t        v rUt        |	   j                  t        |            }
t        j                  d	|
        d
dd|
 diddiddiddid|iddigi}n#d}d
ddid|iddid||giddiddiddid|iddig	i}	 t        j                  d| d| dd         || dddt        j                  |      d}d t        |       }t        j                  |d!"      }|j                  d#k(  rPt!        |j"                        dkD  r8t        j                  d$t!        |j"                         d%       |j"                  S |j                  dk(  r	t        j	                  d&       d
dd'ididdid||giddidd(idd'id|idd'ig	i}|| dddt        j                  |      d)}d t        |       }t        j                  |d*"      }|j                  d#k(  rPt!        |j"                        dkD  r8t        j                  d+t!        |j"                         d%       |j"                  S t        j	                  d,|j                   d-|j"                  dd#         yt        j	                  d.|j                   d-|j"                  dd#         y# t$        $ r+}t        j	                  d/t'        |              Y d}~yd}~ww xY w)0a  
    Use ScrapingBee to:
    1. Load vendor homepage (bypasses Cloudflare)
    2. Navigate to the search results page (for VENDOR_SEARCH_URL_TEMPLATES vendors)
       OR interact with the homepage search box (for other vendors)
    3. Click the best-matching product link using smart component scoring
       (core words + pack number), NOT a single first_keyword token
    4. Return the product page HTML for price extraction

    search_query is the FULL search string, e.g. "RESVERASOR PLUS Pack 2"
    SCRAPINGBEE_API_KEYz4[ScrapingBee] SCRAPINGBEE_API_KEY not set, skipping.Nrx   z3
        (function() {
            var coreWords = z;
            var packNum   = a$  ;
            var minScore  = coreWords.length * 2;

            function scoreLink(el) {
                var combined = ((el.innerText || el.textContent || '') + ' ' + (el.href || '')).toLowerCase();
                var score = 0;
                for (var k = 0; k < coreWords.length; k++) {
                    if (combined.indexOf(coreWords[k]) !== -1) score += 2;
                }
                if (packNum && combined.indexOf(packNum) !== -1) score += 2;
                return score;
            }

            var links = document.querySelectorAll('a');
            var bestEl = null, bestScore = 0;
            for (var i = 0; i < links.length; i++) {
                var href = links[i].href || '';
                if (!href || href.indexOf('#') !== -1 || href.indexOf('javascript') !== -1) continue;
                var s = scoreLink(links[i]);
                if (s > bestScore) { bestScore = s; bestEl = links[i]; }
            }
            if (bestEl && bestScore >= minScore) {
                bestEl.click();
            }
        })();
    rt   z![ScrapingBee] Direct search URL: instructionsr  zwindow.location.href = 'z';waiti@  zwindow.scrollBy(0, 300);r  ip  a  input[type='search'], input[name='q'], input[name='search'], input[name='s'], input[name='search_query'], input[name='busqueda'], input[placeholder*='buscar'], input[placeholder*='Buscar'], input[placeholder*='search'], input[placeholder*='Search'], input[placeholder*='Busca']i  r  rP  r  aZ  var inp = document.querySelector("input[type='search'], input[name='q'], input[name='search'], input[name='s'], input[name='search_query']"); if(inp && inp.form) { inp.form.submit(); } else if(inp) { inp.dispatchEvent(new Event('input', {bubbles:true})); inp.dispatchEvent(new KeyboardEvent('keydown', {key:'Enter', keyCode:13, bubbles:true})); }i  z[ScrapingBee] Searching for '' on rK  truefalse)rY   ra   	render_jspremium_proxyblock_resourcesjs_scenarioz$https://app.scrapingbee.com/api/v1/?Z   )r_      u   [ScrapingBee] ✓ Success (rQ  zB[ScrapingBee] 500 on first attempt, retrying with stealth_proxy...i  a#  var inp = document.querySelector("input[type='search'], input[name='q'], input[name='search'], input[name='s'], input[name='search_query']"); if(inp && inp.form) { inp.form.submit(); } else if(inp) { inp.dispatchEvent(new KeyboardEvent('keydown', {key:'Enter', keyCode:13, bubbles:true})); })rY   ra   r(  stealth_proxyr*  r+  x   u!   [ScrapingBee] ✓ Retry success (z![ScrapingBee] Retry also failed: r   z[ScrapingBee] Got status z"[ScrapingBee] js_scenario failed: )rH   rI   rJ   rc   re   r  r^   dumpsr  VENDOR_SEARCH_URL_TEMPLATESformatr    rd   r
   rf   r:  rj   r   rk   rl   )ra   r   r  rY   r  r  core_words_jspack_number_jssmart_click_js
vendor_keydirect_search_urlr+  search_selectorr   full_urlrn   js_scenario_retryparams_retryfull_url_retryretry_responserq   s                        r7   #scrape_with_scrapingbee_js_scenarior>    s    jjnn23GMN 5\BJJJz*MZZ 1r2N*O ,+, -N< IW4^D\^J007
CJJQ[\hQiJj78I7JKL78I7J"MN78^,	
* 	 /*/<89  |  }^,

53L>s3BxjQR#&::k2
 :)F:K9LM<<"53&3x}}+=+CKK5c(--6H5IQR== 3&NN_aTNo.TNo|<=TN  "I  JTN0TN
!! ##!'#*#zz*;<L  DIlD[C\]N%\\.#FN))S0S9L9L5MPS5S?NDWDW@X?YY`ab%***NN>~?Y?Y>ZZ\]k]p]pquru]v\wxy283G3G2H8==Y]Z]K^J_`a ;CF8DEs,   B=L C%L 12L $2L 	M !MM	min_delayc                 Z    t        j                  | |      }t        j                  |       y)z0Simulate human-like delay with random variation.Nr1   )r?  r<   r6   s      r7   r8   r8     s    NN9i0EJJur9   	min_pause	max_pausec                 N   	 | j                  d      }| j                  d      }t        t        j                  dd            D ]Y  }t        j                  |dz  |      }| j                  d| d       t	        j
                  t        j                  ||             [ y#  Y yxY w)	z'Simulate human-like scrolling behavior.z!return document.body.scrollHeightzreturn window.innerHeightr   r  r>   zwindow.scrollBy(0, z);N)r\  r[  r2   randintr4   r5   r3   )r  rA  rB  total_heightviewport_heightr  scroll_amounts          r7   human_scrollrH    s    	,,-PQ //0KLv~~a+, 	=A"NN?a+?QM!!$7b"IJJJv~~i;<	=s   BB   B$c                 T   	 t        |       }t        t        j                  dd            D ]c  }t        j                  dd      }t        j                  dd      }|j	                  ||      j                  t        j                  dd             e |j                          y#  Y yxY w)	z#Simulate human-like mouse movement.r>   rX   r'  r  i   r?   rX  N)r   r[  r2   rD  move_by_offsetpauser3   perform)r  actionsr  xys        r7   human_mouse_movementrP    s    v&v~~a+, 	IAsD)AsC(A""1a(..v~~c3/GH	I 	s   B B# #B'c                 6    	 | j                  d       y#  Y yxY w)z.Inject JavaScript to hide webdriver detection.zz
            Object.defineProperty(navigator, 'webdriver', {
                get: () => undefined
            });
        Nr\  )r  s    r7   inject_anti_detection_jsrS    s$      	
s    rM  	use_proxyc                 H   t               st        d      t        dd      st        d      t                t	               }| rw|j                  d       |j                  d       |j                  d       |j                  d	       |j                  d
       |j                  d       |j                  d       |rFt               }|r:t        |      }|j                  d|        t        j                  d|dd         |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d       |j                  d        |j                  d!       |j                  d"       |j                  d#       |j                  d$       |j                  d%       |j                  d&       |j                  d'       |j                  d(       |j                  d)       |j                  d*       |j                  d+       |j                  d,       |j                  d-       |j                  d.       |j                  d/       |j                  d0       |j                  d1       |j                  d2       |j                  d3       |j                  d4       |j                  d5       |j                  d6       |j                  d7       |j                  d8       |j                  d9       |j                  d:       |j                  d;       |j                  d<       |j                  d=       |j                  d>       |j                  d?       |j                  d@       |j                  dA       |j                  dB       |j                  dC       |j                  dD       |j                  dE       |j                  dF       |j                  dG       |j                  dH       |j                  dI       |j                  dJ       |j                  dK       |j                  dL       |j                  dM       |j                  dN       |j                  dO       |j                  dP       |j                  dQ       |j                  dR       |j                  dS       t        j                  t              }t        j                  dT|ddU  dV       |j                  dW|        |j                  dXdYg       |j                  dZd[       d\dl}d\dl}t#        |j%                               dd] }t'        j(                         }|d^k(  r/t*        j,                  j/                  |j1                         d_      }	n(d}	d`t*        j2                  da<   dbt*        j2                  dc<   t*        j,                  j/                  |	dd|       }
	 t+        j4                  |
def       t+        j4                  t*        j,                  j/                  |	dg      def       t+        j4                  t*        j,                  j/                  |	dh      def       |j                  dj|
        |j                  dkt*        j,                  j/                  |	dl              |j                  dmt*        j,                  j/                  |	dh              |j                  dnt*        j,                  j/                  |	dg              |d^k7  r"|j                  do       |j                  dp       t*        j2                  j;                  dq      xs t*        j2                  j;                  dr      }|rt        j                  ds|        |s=dtD ]8  }t=        j>                  |      }|s|}t        j                  du| dv|         n |st'        j(                         }t        j                  dw| dV       |d^k(  rdxdyg}n|dzk(  rg d{}n|d|k(  rd}d~g}ng }|D ]=  }t*        j,                  jA                  |      s#|}t        j                  d|         n |st        j9                  d       |rt        j                  d|        	 t*        j,                  jA                  |      rt        j                  d|        tC        jD                  |dgdeded      }|jF                  d\k(  r1t        j                  d|jH                  jK                                 n=t        j9                  d|jL                          nt        jO                  d|        d}|r ||_)        t        j                  d|        nt        j9                  d       	 t        j                  d       tU               jW                         }t        j                  d|        tY        |      }t        j                  d       t[        j\                  ||      }t        j                  d       |
|_/        |S # t6        $ r,}t        j9                  dit#        |              Y d}~d}~ww xY w# tB        jP                  $ r t        j9                  d       Y 5t6        $ r,}t        j9                  dt#        |              Y d}~dd}~ww xY w# t6        $ r}t        jO                  dt#        |       de       	 dta               v rNt*        j,                  jA                  |
      r/t=        jb                  |
de       t        j                  d|
         # t6        $ r Y  w xY wd}~ww xY w)zGInitialize and configure Chrome WebDriver with anti-detection measures.zOChrome dependencies check failed. DevOps must install Chrome via setup scripts./tmpr   )ry  z;Insufficient disk space in /tmp. DevOps must free up space.z--headless=newz--window-size=1920,1080z--start-maximizedz--disable-gpuz--no-sandboxz--disable-setuid-sandboxz--disable-dev-shm-usagez--proxy-server=z[Chrome Init] Using proxy: Nr   z---disable-blink-features=AutomationControlledz--disable-software-rasterizerz--incognitoz--disable-gpu-sandboxz--disable-extensionsz--disable-pluginsz--disable-plugins-discoveryz--disable-print-previewz4--disable-component-extensions-with-background-pagesz--no-default-browser-checkz--disable-background-networkingz--disable-syncz--disable-translatez--hide-scrollbarsz--metrics-recording-onlyz--mute-audioz--no-first-runz"--safebrowsing-disable-auto-updatez--disable-accelerated-2d-canvasz--no-zygotez%--disable-background-timer-throttlingz(--disable-backgrounding-occluded-windowsz --disable-renderer-backgroundingz>--disable-features=TranslateUI,IsolateOrigins,site-per-processz!--disable-ipc-flooding-protectionz--disable-default-appsz--password-store=basicz--use-mock-keychainz--disable-web-securityz --allow-running-insecure-contentz--disable-webglz--disable-threaded-animationz--disable-threaded-scrollingz!--disable-in-process-stack-tracesz--disable-histogram-customizerz--disable-gl-extensionsz!--disable-composited-antialiasingz--disable-canvas-aaz--disable-3d-apisz"--disable-accelerated-video-decodez#--disable-background-media-downloadz--disable-domain-reliabilityz(--disable-client-side-phishing-detectionz--disable-component-updatez--disable-hang-monitorz--disable-prompt-on-repostz3--force-fieldtrials=SiteIsolationExtensions/Controlz--disable-back-forward-cachez--disable-popup-blockingz --disable-session-crashed-bubblez--disable-infobarsz--disable-breakpadz--disable-crash-reporterz--disable-default-tracingz--disable-media-sessionz--no-service-autorunz--disable-audioz@--disable-features=VizDisplayCompositor,AudioServiceOutOfProcessz--disable-preconnectz--disable-client-hintsz--crash-dumps-dir=/tmpz--data-path=/tmp/chrome-dataz"--disk-cache-dir=/tmp/chrome-cachez--disable-loggingz--disable-logging-redirectz--log-level=3z--single-processz [Chrome Init] Using user agent: 2   r  z--user-agent=excludeSwitcheszenable-automationuseAutomationExtensionFr      Windowsr  z:99DISPLAYz	/dev/nullDBUS_SESSION_BUS_ADDRESSzchrome-session-T)exist_okzchrome-cachezchrome-dataz*Could not create Chrome temp directories: z--user-data-dir=z--crash-dumps-dir=zchrome-dumpsz--data-path=z--disk-cache-dir=z--remote-debugging-port=9222z"--remote-debugging-address=0.0.0.0
CHROME_BINGOOGLE_CHROME_BINz3[Chrome Detection] Found via environment variable: )r   zchromium-browserr  r  z.[Chrome Detection] Found in PATH using 'which r  z-[Chrome Detection] Checking common paths for z5C:\Program Files\Google\Chrome\Application\chrome.exez;C:\Program Files (x86)\Google\Chrome\Application\chrome.exer   )r   r   z/usr/bin/chromium-browserr   z/opt/google/chrome/chromez/snap/bin/chromiumDarwinz</Applications/Google Chrome.app/Contents/MacOS/Google Chromez2/Applications/Chromium.app/Contents/MacOS/Chromiumz"[Chrome Detection] Found at path: z3[Chrome Detection] Chrome not found in common pathsz+[Chrome Verification] Verifying Chrome at: u)   [Chrome Verification] ✓ File exists at r   rX   r   u0   [Chrome Verification] ✓ Chrome is executable: u1   [Chrome Verification] ⚠ Chrome returned error: u*   [Chrome Verification] ✗ File not found: u4   [Chrome Verification] ⚠ Chrome --version timed outu3   [Chrome Verification] ⚠ Could not verify Chrome: z#[Chrome Init] Using Chrome binary: zL[Chrome Init] Chrome binary not found. Selenium will attempt auto-detection.zK[Chrome Init] Installing/verifying ChromeDriver with ChromeDriverManager...z%[Chrome Init] ChromeDriver ready at: z,[Chrome Init] Creating Selenium WebDriver...)serviceoptionsu4   [Chrome Init] ✓ WebDriver initialized successfullyu2   [Chrome Init] ✗ Failed to initialize WebDriver: r   r  r  z)[Chrome Cleanup] Removed temp directory: )2r  RuntimeErrorr  r  r   add_argumentrO   rT   rc   rd   r2   rL   r9  r   add_experimental_optionuuidtempfilerl   r   r   r   rH   r   r   
gettempdirrI   makedirsrk   re   rJ   r  r   r   r   r   r   r   r   r   r   r   binary_locationr   installr   r   Chromer
  localsr  )rM  rT  rc  	proxy_urlr  rg  rh  unique_session_idr   tmp_baser  rq   r  r  foundpossible_pathsr   rp   chromedriver_pathrb  r  s                        r7   rZ  rZ    s
    &'lmmF1-XYY &'iG-.6701_-^,7867 "$	.y9I  ?9+!>?KK5in5EFG HI89'01/0,-6723OP56:;)*./,-34()*=>:;'@ACD;<YZ<=1212./12;<*+7878<=9:23<=./,-=>>?78CD561256NO7834;<-. -.344523/0*+[\/012 1278=>,-56) +, 12J
LL3JsO3DCHI=56##$58K7LM##$<eD4 DJJL)"1- __F77<< 3 3 5x@ %

91<

-.GGLL_=N<O+PQMN
MD1
BGGLL>:TJ
BGGLL=9DI +M?;<-bggll8^.T-UVW<X}(M'NOP,RWW\\(N-S,TUV ;<AB **...U"**..AT2UKI+WX N 	CLL%E#LSEQTU`Tabc	 "DVHCPQYHNN wN xNDN
  N" 	Dww~~d#"A+OP		 NNPQ B;-PQ	[ww~~k*HVW#k(BSW^blmn$$)LL#STZTaTaTgTgTiSj!klNN%VW]WdWdVe#fgI+WX" "-:;-HIefbc/199;<=N<OPQ+,CD!!'7CJK -O  NCCF8LMMN` (( 	SNNQR 	[NNPQTUVQWPXYZZ	[.  	I#a&R]ab	&(*rww~~m/Lm4@GWX 	  			sv   Bk+ 
Cl# Bn +	l 4!ll #(nn!m==n	p!#p2App	ppppp!c                 ^   	 t        | d      ri| j                  r\| j                  }t        j                  j	                  |      r0t        j                  |d       t        j                  d|        yyyy# t        $ r+}t        j                  dt        |              Y d}~yd}~ww xY wr	  )r  r
  rH   r   r   r  r  rc   rd   rk   re   rl   r  s      r7   r  r    s    H6+,1F1F"11Mww~~m,m4@J=/Z[ - 2G,
  H=c!fXFGGHr  c                 z   g d}|D ]`  }	 | j                  t        j                  |      }|D ]8  }|j                         s|j	                          t        j                  d       : b 	 | j                  j                  j                  t        j                         y # t        $ r Y w xY w# t        $ r Y y w xY w)N)zbutton.cookie-acceptzbutton#cookie-acceptzbutton.acceptzbutton.btn-acceptzbutton[aria-label='Close']zbutton.closez.modal button.closez.popup button.closez.cookie-consent buttonzdiv#cookie-consent buttonzbutton[title='Close']333333?)find_elementsr   CSS_SELECTORis_displayedr  r4   r5   rk   	switch_toactive_element	send_keysr   ESCAPE)r  	selectorsr  elemsr  s        r7   dismiss_overlaysr    s    I  	((#>E $??$HHJJJsO$''11$++>  		  s)   5B'B+3B. 	B+*B+.	B:9B:c                    g d}|D ]p  }	 | j                  t        j                  |      }|D ]H  }|j                         s|j	                          t        dd       t        j                  d|         n r g d}t        j                  d       |D ]  \  }}	}
	 |dk(  r?t        | d      j                  t        j                  t        j                  |	f            }n>t        | d      j                  t        j                  t        j                  |	f            }d	}|D ]'  }|j                         s|j                         s%|} n |st        j                  d
|
        	 |j	                          t        dd       	 |j!                          |j#                  |       t        dd       t        j                  d| d       d}	 |j#                  t$        j&                         d}|s	 |j)                          d}|ssg d}|D ]j  \  }}	 | j                  t        j                  |      }|rB|d   j                         r/|d   j	                          t        j                  d|        d} nl |r#t        dd       t        j                  d        yt        j+                  d       " t        j                  d
        y# t        $ r Y w xY w#  Y `xY w#  Y WxY w# t        $ r}Y d	}~d	}~ww xY w# t        $ r}Y d	}~d	}~ww xY w# t        $ r Y w xY w# t        $ r}Y d	}~d	}~ww xY w)z7Find and interact with search input on vendor websites.)zbutton[aria-label*='search' i]zbutton[aria-label*='buscar' i]za[aria-label*='search' i]za[aria-label*='buscar' i]z.search-togglez.search-iconz.icon-searchz[class*='search-toggle']z[class*='search-icon']z[class*='searchToggle']zbutton.searchzspan.searchzbutton svg[class*='search' i]za svg[class*='search' i]rX  r  z [Search] Clicked search toggle: ))xpathz//input[@type='search']zsearch input type)r  zs//input[@placeholder[contains(translate(., 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz'), 'search')]]zsearch placeholder XPath)r  zN//input[@name='search' or @name='q' or @name='keyword' or @name='searchInput']znamed search input)csszinput[type='search']zCSS search input)r  z0[placeholder*='search'], [placeholder*='Search']zCSS search placeholder)r  z:input[placeholder*='buscar'], input[placeholder*='Buscar']zSpanish buscar)r  z//form//input[1]zfirst form input)r  z&//input[@class[contains(., 'search')]]zsearch class input)r  z/input.search, input.search-box, input.searchboxzcommon search classes)r  zinput[name='search_query']zsearch_query name)r  z#search_query_topzsearch_query_top id)r  zinput.search_queryzsearch_query class)r  zform input[type='text']zgeneric form text inputz+[Search] Attempting to locate search bar...r  r  Nu%   [Search] ✓ Search bar FOUND using: rw  g333333?皙?g333333?z[Search] Typed query: 'r   FT))z//button[contains(., 'Search')]zSearch button text)z//button[contains(., 'Buscar')]zBuscar button text)//button[@type='submit']zsubmit button)//input[@type='submit']zsubmit input)$//button[contains(@class, 'search')]zsearch class buttonr   z[Search] Submitted with r>   u*   [Search] ✓ Search SUBMITTED successfullyu$   [Search] ✗ FAILED to submit searchz[Search] Selector failed: )rx  r   ry  rz  r  r8   rc   r   rk   rd   r   untilEC presence_of_all_elements_locatedXPATH
is_enabledclearr}  r   RETURNsubmitre   )r  r   rz   search_toggle_selectors
toggle_seltogglestogglesearch_selectorsselector_typeselectorr|   elementsr  r  submit_successrq   button_selectorsbtn_selectorbtn_descbuttonss                       r7   find_and_searchr    s3   " . 

		**2??JGG! &&(LLNS)LL#CJ<!PQ
  KK=>0@ Y,xX	'(3997788LM )39977(8ST
 G ??$ G
 KK?}MNC%

 l+S!KK1,qAB #N!!$++.!%
 "NN$%)N
 "$  /? 	!*L(!"("6"6rxx"N"wqz'>'>'@#AJ,,."KK*B8*(MN-1N!	! Aq!HIEFmYv LL-k];<a  		^
   ! ( % ! !  		s   5K7KBL/8L/	L/L/*K!K)8L/!K11L/4LL/A"L7'L/ L/	KK!K&#L/)K.+L/1	L:L/ LL/	LL/LL/	L,(L/+L,,L//	M>Mr]  c                    t        | d      }d}|j                  dd      }|D ]P  }	 |j                  st        j                  |j                        }fd |      }|rd|cxk  rdk  rn n|c S R |sR|j                  d	d
i      }|r<|j                  d      xs |j                  }t        |      }|rd|cxk  rdk  r|}|S  |s|j                  d      }	t        j                  d|	      }
g }|
D ]0  }t        |      }|sd|cxk  rdk  sn  |j                  |       2 |r0t        t        t!        |                  }|t#        |      dz     }|S |S # t
        $ r Y Gw xY w)z2Extract price from HTML using multiple strategies.r  Nscriptzapplication/ld+json)r?  c                    t        | t              rw| j                  d      dk(  rd| v r	 t        | d         S d| v r(| d   }t        |t              rd|v r	 t        |d         S | j                         D ]  } |      }|s|c S  y t        | t              r| D ]  } |      }|s|c S  y # t        t
        f$ r Y w xY w# t        t
        f$ r Y pw xY w)Nz@typeOfferr   offers)
isinstancedictrJ   r   r   r   valueslist)objr  valuerp   item
find_prices        r7   r  z+extract_price_from_html.<locals>.find_price  s    c4(www'72w#~!#(W#66  3!$X%fd368I%',VG_'= = "% *!+E!2!#)M*   T* # *!+D!1!#)M* % !+I6 ! ! %/	#: % $%s#   B/ C /C CCCr  rY  r  r   r  r  r~   z\d+[.,]\d{2}r>   )r   r  stringr^   loadsrk   findrJ   r   r%  r  r"  r`  ri   rb  r  ra  rj   )r]  r  r   scriptsr  ro   	price_tagr  r  r   r  rg  mr  s                @r7   r_  r_    s   m4DM mmH+@mAG $#	==::fmm,D2 't,M!?!?$$C$N IIZ$9I:	mmI.@)..G(1JcZ969 *$$ :
 }}S!**_d3 	*A(+JcZ969j)	*
 DV-.F"3v;!#34M  ;  		s   E =E  	E-,E-max_retriesc                 z   t        |      D ]^  }	 t        j                  dft        j                  dft        j                  dft        j                  dft        j                  dft        j                  dft        j                  dft        j                  dft        j                  d	ft        j                  d
ft        j                  dfg}d}|D ]S  \  }}	 | j                  ||      }|D ]2  }		 |	j                         r|	}t        j                  d| d|         n4 |r nU |s9t        j                  d       t        j                  dt        |       d        y	 |j                          t        dd       	 |j                          |j                  |       t        j                  d|        t        dd       d}
	 |j                  t         j"                         t        j                  d       d}
|
sF	 |j                  t         j$                  t         j"                         t        j                  d       d}
|
spg d}|D ]g  }	 | j                  t        j                  |      }|rB|d   j                         r/|d   j                          t        j                  d|        d}
 ni |
s(	 |j)                          t        j                  d       d}
|
sL	 |j+                  t        j                  d       }|r)| j-                  d!|       t        j                  d"       d}
|
s,t        j                  d#       ||d$z
  k  rt/        |       Q yt        d%d&        y y#  Y xY w#  Y xY w#  Y xY w#  Y xY w#  t        j                  d       Y xY w#  Y jxY w# t&        $ r Y qw xY w# t&        $ r Y w xY w# t&        $ r Y w xY w# t&        $ rO}t        j1                  d'|d$z    d(| d)t3        |              ||d$z
  k  rt/        |       nY d}~ yY d}~)d}~ww xY w)*z:Search on Google with retry logic and exponential backoff.r   ztextarea[name='q']zinput[name='q']z//textarea[@name='q']z//input[@name='q']z[aria-label='Search']zinput[aria-label='Search']z//*[@aria-label='Search']z[role='searchbox']z$//*[@role='combobox' and @aria-owns]zbody input[type='text']Nz#[Discovery] Found search box using r   z>[Discovery] Could not find Google search box with any selectorz[Discovery] Attempted z selector combinationsFrw  gffffff?z[Discovery] Typed query: r  rR  z[Discovery] Pressed EnterTz6[Discovery] Enter key failed, trying alternate methodsz[Discovery] Pressed Ctrl+Enter)z%//button[@aria-label='Google Search']z%//button[@aria-label='google search']z)//button[contains(@aria-label, 'Search')]z$//button[contains(text(), 'Search')]z2//input[@type='submit' and @value='Google Search']r  r  r  z@//button[contains(@class, 'btn') and contains(@class, 'search')]//button[@jsaction]z'//button[contains(@data-ved, '0ahUKE')]z///div[@role='button' and contains(., 'Search')]z1//span[contains(text(), 'Search')]/parent::buttonz1//span[contains(text(), 'Buscar')]/parent::buttonr   z2[Discovery] Clicked search button using selector: z[Discovery] Submitted formzancestor::formzarguments[0].submit();z)[Discovery] Submitted form via JavaScriptz4[Discovery] Could not submit search using any methodr   r  rX   z0[Discovery] Error during Google search (attempt r  ): )r[  r   NAMEry  r  rx  rz  rc   rd   re   r   rj   r  r8   r  r}  r   r  CONTROLrk   r  find_elementr\  rC   r   rl   )r  rU   r  r:   r  r  r  selector_valuer  r  r  r  r  r  formrq   s                   r7   search_googler    s5   % QP	 #"67"3423/0"9:">?67"67AB";< " J1A -~%33M>RH#+ %%&335-4
 &.QR_Q``bcqbr,s t %  6% " "  _`5c:J6K5LLbcd  "C%
  "
   'KK3E7;<S! #NW$$T[[178!%
 "((t{{CKK @A%)N
 "$   %5 	!L!"("6"6rxx"N"wqz'>'>'@#AJ,,."KK*\]i\j(kl-1N!	! "%%'KK <=%)N
 "%22288=MND--.FM$OP)- "UV[1_,-g61UQf k%$
WUV8 % ! ! !  !   	LLKGVWK=XYZeYffijmnojpiqrsq()'2 3	s  CO"M71-M/M7$=O"$M?N7O"	6N?O"AN,O"A"N45O"<'O#O"&AO1*O"O"/M4	1M77M<9O"?NO"N	O"N)&O",N1.O"4	O=O" OO"	OO"OO"	OO"OO""	P:+>P55P:registered_domainsc                 L   g }|  d|dd  d}t         j                  d|        t         j                  d|        t         j                  d       d}	 t        d	      }d
dg}|D ]  }	 t         j                  d| d       |j                  |       t	        dd       d|j
                  v st        |j                        dkD  rUt         j                  d|        	 |j                  d       t	        dd       |j                  d       t	        dd        n 	 g d}
|
D ]~  }	 |j                  t        j                  |      }|D ]V  }	 |j                         rC|j!                         r3|j#                          t         j%                  d       t	        dd        nX  	 t         j                  d|        t'        ||d      }|j(                  j+                         |j                  j+                         g d}t-        fd|D              }|rt         j                  d        d}|rI|sF	 |j                  }t/        |d!      }g }|j1                  d"d#$      }t         j                  d%t        |       d&       |D ]  }|j                  d'd(      j3                  d)      s'd*j+                         vs:t-        fd+d,D              rO	 t5              j6                  j9                  d-d(      }|r-||vr)|j;                         t         j%                  d.|         t=        t?        |            dd0 }t         j                  d1t        |       d2       tA        |d3      D ]  \  }}	 t         j                  d4| d5t        |       d6|dd7         |j                  |       	 tC        |d8      jE                  d9        t	        d:d;       |j                  tG              }|r\tI        |      }tK        ||      \  }}tM        ||      }|j;                  d|||||||d<       t         j                  d=| d>|        nt         j%                  d?|        t	        ddA        	 |r	 tQ        |       |jS                          |rr{t         j                  dE       tU        |      }|rCd}	 t        d	      }tA        |d3      D ]  \  }}	 t         j                  dF| d5t        |       d6|dd7         |j                  |       	 tC        |d8      jE                  dG        t	        d:d;       |j                  tG              }|r\tI        |      }tK        ||      \  }}tM        ||      }|j;                  d|||||||d<       t         j                  d=| dH|        nt         j%                  d?|        t	        ddA        	 |r2	 tQ        |       |jS                          nt         j                  dK       t         j                  dLt        |       dM       |S # t        $ r Y 3w xY w# t        $ r/}	t         j                  d| dt        |	              Y d}	~	d}	~	ww xY w#  Y FxY w#  Y txY w# t        $ r,}	t         j%                  dt        |	              Y d}	~	%d}	~	ww xY w# t        $ r,}	t         j%                  d/t        |	              Y d}	~	yd}	~	ww xY w#  Y ;xY w# t        $ r/}	t         j                  d@| dt        |	              Y d}	~	d}	~	ww xY w# t        $ r,}	t         jO                  dBt        |	              Y d}	~	 d}	~	ww xY w# t        $ r.}	t         jO                  dCt        |	       d#D       Y d}	~	:d}	~	ww xY w#  Y )xY w# |r$	 tQ        |       |jS                          w #  Y w xY ww xY w#  Y xY w# t        $ r/}	t         j                  dI| dt        |	              Y d}	~	Vd}	~	ww xY w# t        $ r,}	t         j                  dJt        |	              Y d}	~	~d}	~	ww xY w#  Y WxY w# |r$	 tQ        |       |jS                          w #  Y w xY ww xY w)Nz
    Discover alternative vendors via search (Google/Bing) and scrape their prices.
    Uses multiple fallback strategies when Google blocks the request.
    r~   Nr   z precio comprar onlinez([Discovery] Starting search with query: z+[Discovery] Registered domains to exclude: z3[Discovery] Strategy 1: Attempting Google Search...F)rT  zhttps://www.google.eszhttps://www.google.comz[Discovery] Loading r  r>   g      @Googler  u   [Discovery] ✓ Loaded z+window.scrollTo(0, window.innerHeight / 4);rX  r  window.scrollTo(0, 0);z[Discovery] Could not load r   )z //button[contains(., 'Aceptar')]z//button[contains(., 'Accept')]z//button[contains(., 'all')]r  z$[Discovery] Dismissed cookie consentz&[Discovery] Cookie dismissal skipped: z*[Discovery] Attempting Google search for: )r  )
sorry	recaptcharobot	automatedblockedzunusual trafficverifycaptchapuzzle	challengec              3   2   K   | ]  }|v xs |v   y wr   r  )r   	indicatorcurrent_urlr]  s     r7   r   z/discover_alternative_vendors.<locals>.<genexpr>  s$     vV_Y+5Qk9QQvs   u3   [Discovery] ⚠ CAPTCHA or block detected on Googler  aThrefz[Discovery] Found z total links on pager  rx   httpgooglec              3   &   K   | ]  }|v  
 y wr   r  )r   rN  r  s     r7   r   z/discover_alternative_vendors.<locals>.<genexpr>  s!       \Ojk\]ae\e  \O   )z/ads/z/aclkwebcacher  z[Discovery] Found link: z [Discovery] Error parsing link: rX   z[Discovery] Will scrape z  unique vendor links from Googler   z[Discovery r  z] Scraping: r'  r]   c                 *    | j                  d      dk(  S Nzreturn document.readyStatecompleterR  ds    r7   <lambda>z.discover_alternative_vendors.<locals>.<lambda>      !*:*:;W*X\f*f r9   rR        @)	vendor_idrz   r   r   r   r   price_differencepercentage_differenceu   [Discovery] ✓ Price z
 found at z[Discovery] No price found at z[Discovery] Error scraping r  z*[Discovery] Error parsing Google results: z#[Discovery] Google strategy error: r   zN[Discovery] Strategy 2: Google failed/blocked, attempting Tavily API Search...z[Discovery-Tavily c                 *    | j                  d      dk(  S r  rR  r  s    r7   r  z.discover_alternative_vendors.<locals>.<lambda>-	  r  r9   z found via Tavily at z"[Discovery] Tavily error scraping z#[Discovery] Tavily scraping error: z*[Discovery] Tavily API returned no resultsu0   [Discovery] ✓ Completed all strategies. Found  alternative vendors)+rc   rd   rZ  rJ   r8   rw   rj   r]  r\  rk   re   rl   rx  r   r  rz  r  r  r   r  r  r   anyr   r  rS   r	   r  r   ri   r  ra  r   r   r  r_  r  r-  r0  r   r  r^  rr   ) r   r   r   r  discovered_vendorsr   r  google_urls
google_urlrq   dismiss_buttonsr  r  btngoogle_successcaptcha_indicatorsgoogle_blockedr  r  result_links	all_linksr   r  r   
disc_pricediff	perc_diffr   	api_linksr  r  r]  s                                 @@@r7   discover_alternative_vendorsr  s  s   
 YaSb 122HIL
KK:<.IJ
KK=>P=QRS KKEFF[+e< $$

 & 	J2:,cBC

:&As# v||+s63E3E/F/MKK"9* FG--.[\#C---.FG#C-  0N	.	LO , $22288XFG& !!"//1cnn6F #		 &-S T +C 5 %!$ 	@OP&v|K ((..0((..0

 vcuvvNNPQ"N .DT))$T=9  " MM#DM9	0Y0@@TUV% 
VD88FB/Dv.84::<3OX[  \O  pN  \O  YOV%-d^%:%:%B%B62%NF%&8J*J , 3 3D 9 &/Gx-P Q
V  $C$56r:6s<7H6IIijk "+<!; '&IC$Wk#aL8I7J,W[\`]`WaVb$cd

4(!)&"5;; f $C-&,&8&8%<[%I
%%8%>F.Hj.YOD)%@j%QF.55-1/5.21;'**0489B	7 	 #KK*@JW]V^(_`"LL+I$)PQ
  1%O'&^ &v.
 de%l3	F33eD!*9a!8 '&IC$^&8Qs9~>Nl[_`dad[eZf$gh

4(!)&"5;; f $C-&,&8&8%<[%I
%%8%>F.Hj.YOD)%@j%QF.55-1/5.21;'**0489B	7 	 #KK*@Labhai(jk"LL+I$)PQ
  1%O'&X .v6 NNGH
KKB3GYCZB[[opqI %   !<ZL3q6(ST,!  	LLLA#a&JKK	LT  ) V"LL+KCPQF8)TUUV"! 4 % W)DTF"SQRVH'UVVW
  TI#a&RSST  T:3q6(CdSST &v. 2! 4 % ^)KD6QSTWXYTZS['\]]^
  O!DSVHMNNO .v6 s0  ^ /A4Y$:Y^ #	Z  -%ZAZ$Z(Z  *B^ >A7] 6] 	] A[3A] ;\>\B\3] ^ _ `; ,;` (_8B` `; ,a; 0a3 	YYYY	Z$Z^ Z^ ZZZZ   	[)![
^ [^ 	\!!\] \] \\	]!$]] ]] 	^!^=^ ^^ 	_#^=7_ =__ _
_5_-,_5-_1/_58_=:`  	`8	$`3-`; 3`88`; ;	a0!a+%a; +a00a; 3a8;b#?bb#bb#c                 
   g }t         j                  dd        t         j                  d       t         j                  d|  d|        t         j                  d|        t         j                  dt        |              t         j                  d d       	 t        || |      }t         j                  d	t        |       d
       |rnt         j                  d       t	        |dd d      D ]G  \  }}|j                  dd      }|j                  dd      }	t         j                  d| d| d|	        I |st         j                  d       |S t         j                  dt        |       d       t	        |d      D ]  \  }}	 |j                  dd      }
|j                  dd      }|j                  dd      }|j                  d      }|j                  dd      }|j                  dd      }|
st         j                  d | d!| d"       ||v rt         j                  d#| d$| d%       d}d&}|D ]H  }|j                         |j                         v s!|j                         |j                         v sD|}d'} n |rt         j                  d#| d(| d)       t         j                  d*| d+       t         j                  d,|j                          d-|j                          d.|j                          d-|j                          d/	       t         j                  d0| d(| d1       t         j                  d2t        |       d3|        ||
||||||t        j                         d4	}|rT||z
  }||d5<   |d6kD  r||z  d7z  nd6|d8<   t         j                  d9| d$| d:|
dd;  d<|d=d> |j                          
       n3t         j                  d9| d$| d:|
dd;  d? |j                                 |j                  |        t         j                  dAt        |       dB       t        |      t        |      z
  }t         j                  dCd        t         j                  dD       t         j                  dEt        |              t         j                  dFt        |              t         j                  dG|        t         j                  dHd d       |S # t        $ r/}t         j                  d@| d$t        |              Y d}~d}~ww xY w# t        $ r,}t         j                  dIt        |              Y d}~|S d}~ww xY w)Ja  
    Discover alternative vendors using SERP API Google Shopping Light Engine.
    Queries SERP API for shopping results and extracts vendor information with prices.
    Sequence: First vendors scraped, then SERP API used for discovery.
    
    Args:
        barcode: Product barcode
        product_name: Product name
        msp: Minimum Selling Price
        registered_domains: Set of already registered vendor domains to exclude
    
    Returns:
        List of discovered vendor dictionaries with price information
    r   r   u-   [Discovery SERP] 🔍 STARTING SERP DISCOVERYz[Discovery SERP] Product: r   z0[Discovery SERP] Registered domains to exclude: z/[Discovery SERP] Number of registered domains: )r   r   r   u0   [Discovery SERP] ✓ search_serp_api() returned z total results to processz/[Discovery SERP] Sample vendor names from SERP:Nr  r   rz   r   r   r   z  [z] r   z-[Discovery SERP] SERP API returned no resultsz[Discovery SERP] Processing z SERP API results
r   rx   r   ry   r   r   r   z![Discovery SERP] Skipping result r  z	): no URLr   r   z' - FILTERED (vendor already registered)FTz: 'z' - FILTEREDz;           Reason: Substring match with registered domain 'r   z           Logic: 'z' in 'z' OR 'z' = TRUEr   z' PASSESz$           Reason: No match against z registered domains: )	rz   r   r   r   r   source_typer   r   violation_dater  r   r'  r  z[Discovery SERP] Result z - URL: r   u    - Price: ₹r   r   z - Price: N/A - Status: z)[Discovery SERP] Error processing result u    [Discovery SERP] ✓ Discovered z! alternative vendors via SERP APIr   z#[Discovery SERP] FILTERING SUMMARY:z0[Discovery SERP]   Total SERP results received: u5   [Discovery SERP]   ✓ Passed filters (new vendors): u6   [Discovery SERP]   ⊘ Filtered (registered vendors): r   z3[Discovery SERP] Critical error in SERP discovery: )rc   rd   rj   r   r   rJ   re   r   r   r   nowr   ri   rk   rl   r   )r   r   r   r  r  r`   r   rp   vendorr   r   rz   r   r   r  r   matching_domainis_registeredr  discovery_dict
price_diffrq   filtered_counts                          r7   !discover_alternative_vendors_serpr  _	  s     KK"VH
KK?A
KK,WISGH
KKBCUBVWX
KKA#FXBYAZ[\
KK6("mU "|WRUVFs7|nTmnoKKIK(!a8 DVM9=

?E:c#b
5'BCD
 NNJK%%23w<.@STU %Wa0 K	KCJ#ZZb9
$jj	B &

?B ? &

? ;$jj3LMHi8!LL#DSEK=Xa!bc "44KK">se2k]Ry z{ #' %0 F"((*flln<R]RcRcRe@e*0(,	 !KK">se3{mS_ `aKK"]^m]nno pqKK"5k6G6G6I5J&Q`QfQfQhPiiop  qF  qF  qH  pI  IO  P[  Pa  Pa  Pc  Od  dl  !m  nKK">se3{mS[ \]KK"FsK]G^F__t  vH  uI  !J  K $/",%2%2#.$)/&.lln
" !!$}!4J9CN#56Z]`aZazC?ORU?UghN#:;KK23%r+ G"",Sb/!2 3''4S&9 :%%1V\\^$46 KK23%r+ G"",Sb/!2 3%%1V\\^$46 #)).9OK	Z 	6s;M7N6OOpqr W,>(??)&239;Fs7|nUVKCPbLcKdefL^L\]^'xr23
 %  !J3%rRUVWRXQYZ[  UJ3q6(STTUsx   B8T, 5T, B	S1T,  S16T, 7A	S1BS1T, C8S1CT, 1	T):$T$T, $T))T, ,	U!5!UU!pack_msptimeout_secondsc           
      (  5 t        |      \  }}t        dt        |      dz        }	 t        j	                  d|j
                   d| d| d       |j                  |j                         	 t        ||      j                  d        	 |j                  }	|j                  d	      }
t        |
      }|d
k  xs d|	v xs
 d|	v xs d|
v }|rt        j                  d|j
                   d       t        |j                  ||j                        }|r~t        |      d
kD  rot        ||      }|st        |      }|rSt        j	                  d| d|j
                          |j                  }	 t!        |d      }|j#                  dd      }|r'|j                  dd      j%                  d      r|d   }n;|j#                  dd      }|r&|j                  dd      j%                  d      r|d   }t'        |j                        }|t(        v r}t+        |      }t+        |j                        }|j,                  |j,                  k(  xr |j.                  j1                  d      dk(  }|r"t(        |   j3                  t5        |             }||fS y#t'        |j                        }|t(        v rt(        |   j3                  t5        |             }t        j	                  d$|        |j                  |       	 t        ||      j                  d%        t;        dd&       	 |j                  d	      xs d}
|
j=                         5g d'}t?        5fd(|D              r't        j                  d)| d*|j
                   d+       y#	 	 t        |d-      j                  d.        t;        d/d0       	 |j                  d2       t;        d3d/       |j                  d4       t;        d5d3       |j                  d6      }	t!        |	d      }d"}d7}|jA                  d8d9:      D ]  }|j                  dd      }|r"|j%                  d;      s|j%                  d<      r:|j#                  d=      } d>jC                  |jE                  d>d9?      xs d|j                  d@      xs d| r| j                  dA      xs dndg      }!tG        |!|||      }"|"|kD  s|"}|j%                  d      r|n'tI        dBdCgD      jK                  |j                  |      } ||k  r	 |jM                  tN        jP                  d8      }#|#D ]Z  }$|$jS                  d      xs d}|$jT                  xs d}%|$jS                  d@      xs d}&tG        |%d>z   |&z   |||      }"|"|kD  sW|"}|}\ 	 t        j	                  dF| dG| dHtW        |      d"dI         |r||k  r't        j                  dJ| d*|j
                   d+       y#|j                  |       t;        dd&       t        j9                  dK|jX                          nt[        |       t;        d5d3       t        j	                  dL| dM       t]        |||j
                        }(|(s9t        j                  dN       t]        || j^                  |j
                        }(|(sy#t;        dd&       |j                  d6      }	t!        |	d      }d"}d7}|jA                  d8d9:      D ]  }|j                  dd      }|r"|j%                  d;      s|j%                  d<      r:|j#                  d=      } d>jC                  |jE                  d>d9?      xs d|j                  d@      xs d| r| j                  dA      xs dndg      }!tG        |!|||      }"|"|kD  s|"}|j%                  d      r|n'tI        dBdCgD      jK                  |j                  |      } ||k  rf	 |jM                  tN        jP                  d8      }#|#D ]?  }$|$jS                  d      xs d}|$jT                  xs d}%tG        |%|||      }"|"|kD  s<|"}|}A 	 |rA||k\  r<t        j	                  dO| dH|d"dI         |j                  |       t;        dd&       n't        j                  dP| d*|j
                   d+       y#|j                  d6      }	t        |	|      })|)sg dQ}*dR}+|*D ]  },	 |jM                  tN        j`                  |,      }-|-D ]  }$	 |j                  dS|+ dT|$      }.|.r	 |$jS                  d      xs |$jT                  xs d}/|/j1                         }/|/sTtc        |/      }0|0sbd3|0cxk  rdUk  snn q|0})t        j	                  dV|, dW|)         n |)r n |)s	 |jM                  tN        j`                  dX      }1g }2|1D ]  }$	 |j                  dS+ dT|$      }.|.r	 |$jS                  d      xs |$jT                  xs d}/|/j1                         }/|/sTtc        |/      }0|0sbd3|0cxk  rdUk  snn q|2je                  |0        |2r/t        |2      })t        j	                  dYt        |2       dZ|)        |)sg d[}3|3D ]  },	 |jM                  tN        j`                  |,      }-|-D ]s  }$|$jS                  d      xs |$jT                  xs d}/|/j1                         }/|/s9tc        |/      }0|0sGd3|0cxk  rdUk  sSn V|0})t        j	                  d\|, dW|)         n |)r n |)st        |	      })|)r3t        j	                  d]|) d|j
                          |)|jX                  fS t        j                  d^|j
                          y##  t        j                  d| d       Y xY w#  d}Y 
xY w# t6        $ r#}t        j9                  d!|        Y d"}~	Nd"}~ww xY w#  Y xY w# t6        $ r#}t        j9                  d,|        Y d"}~d"}~ww xY w#  t        j9                  d1       Y xY w#  Y NxY w# t6        $ r#}'t        j9                  dE|'        Y d"}'~'d"}'~'ww xY w# t6        $ r Y *w xY w# t6        $ r Y ew xY w# t6        $ r Y w xY w# t6        $ r Y w xY w# t6        $ r Y @w xY w# t6        $ r Y Hw xY w# t6        $ r8}4t        jg                  d_|j
                   d`tW        |4              Y d"}4~4y#d"}4~4ww xY w)au  
    Scrape the price for ONE pack variant from ONE vendor.

    Args:
        product      : Product ORM object.
        vendor       : Vendor ORM object.
        driver       : Selenium WebDriver instance.
        search_query : Full search string, e.g. "RESVERASOR PLUS Pack 2".
        pack_msp     : The MSP threshold for this specific pack count.
        timeout_seconds: Page load timeout.

    Returns:
        (scraped_price, source_url) or (None, None) if not found.

    Strategy for VENDOR_SEARCH_URL_TEMPLATES path:
      1. Navigate directly to the vendor search results URL.
      2. Check for no-results page — skip immediately if found.
      3. Score all <a> links with smart_link_score (core words + pack number).
      4. Navigate to the best-scoring product page.
      5. Extract price from the product page using layered strategies:
           a. extract_price_near_product (scoped itemprop → max itemprop → DOM walk)
           b. Scoped CSS selectors targeting main product container
           c. Max of all itemprop price elements (handles bundle pages)
           d. Broader fallback CSS selectors
           e. extract_price_from_html (JSON-LD → itemprop → median)
    r>   z[Vendor] ===== z
 | query='z' | pack_msp=z =====c                 *    | j                  d      dk(  S r  rR  r  s    r7   r  z'scrape_vendor_website.<locals>.<lambda>
  s    !**+GHJV r9   z![Vendor] Page load timeout after zs, continuing...rO  rP  rS  zcf-challengezJust a momentFu   [Vendor] ⚠️ Cloudflare on z. Switching to ScrapingBee...)r  u    [Vendor] ✓ ScrapingBee price: r8  r  r   	canonical)relr  rx   r  metazog:url)propertyr  r  rt   z3[Vendor] Could not resolve ScrapingBee source URL: NNNz[Vendor] Direct search URL: c                 *    | j                  d      dk(  S r  rR  r  s    r7   r  z'scrape_vendor_website.<locals>.<lambda>[
  s    a../KLPZZ r9   r  )zno tenemos productos parazno results foundzno se encontraron resultadoszno encontramos resultadosz0 resultadoszsin resultadoszyour search returned no resultsc              3   &   K   | ]  }|v  
 y wr   r  )r   rV  page_text_lowers     r7   r   z(scrape_vendor_website.<locals>.<genexpr>n
  s     HqO+Hr  z[Vendor] No results for 'r%  u    — skippingz"[Vendor] No-results check failed: rZ  c                 Z    t        | j                  t        j                  d            dkD  S )Nza[href*="product"], a[href*="producto"], [class*="product"] a, [class*="result"] a, [data-product-id] a, [class*="km-items"] a, [class*="card"] a, [class*="item"] ar   )rj   rx  r   ry  r  s    r7   r  z'scrape_vendor_website.<locals>.<lambda>w
  s+    c!//"//?#  	 r9   rR  r  z)[Vendor] JS wait timed out, continuing...zwindow.scrollBy(0, 400);r  r  rX  z)return document.documentElement.outerHTMLr   r  Tr  #
javascriptimgr~   )	separatorr   rw   altzurllib.parseurljoin)fromlistz#[Vendor] Live link scoring failed: z[Vendor] Best link score=z (need>=r  r   z'[Vendor] No matching product link for 'z$[Vendor] Navigated to product page: z[Vendor] Search attempt: 'r   z)[Vendor] Search failed. Trying barcode...z+[Vendor] Navigating to product page (score=z[Vendor] No matching link for ')r  r  r  r  r  r  r  r  r  r  r  r  zu'#blockpack, .blockpack, #related-products, .related-products, .cross-sells, .up-sells, #crossselling, .crossselling'zreturn arguments[0].closest(z) !== null;r  u)   [Vendor] ✓ Price from scoped selector 'u   ': €z[itemprop='price']u   [Vendor] ✓ Price (max of u'    noise-filtered itemprop elements): €)z.product-price .pricez.product__price .pricez.product-info .pricez.precioz.product-pricez%meta[property='product:price:amount']zmeta[name='twitter:data1']u+   [Vendor] ✓ Price from fallback selector 'u    [Vendor] ✓✓✓ PRICE FOUND: u!   [Vendor] ✗ No price found from u   [Vendor] ❌ Error scraping r   )4r  r  rj   rc   rd   namerJ   website_urlr   r  re   r]  r\  r>  r  r_  r   r  rS   r  r1  r	   r  r   r   r2  r    rk   r   r8   r   r  r  r   r  r   
__import__r  rx  r   TAG_NAMEget_attributer   rl   r  r  r  r   ry  r%  ri   r   )6r  r  r  r   r  r  r  r  relaxed_min_scorer]  rf  text_lencloudflare_detectedr  r   
source_urlsoup_sbr   og_urlvendor_website_normalizedparsed_resolvedparsed_homepageis_homepageurl_err
search_urlno_results_phrasesnr_errsoup_resultsbest_url
best_scorer  r  r  r   r  
live_linksr  r   rw   live_errsearch_successr   MAIN_SCOPE_SELECTORSNOISE_ANCESTOR_CHECKr  elsin_noiser  r  all_price_els
candidatesFALLBACK_SELECTORSrq   r  s6                                                        @r7   scrape_vendor_websiter0  	  s   D 5\BJAs:23jofkk]*\N-X`Waaghi

6%%&	b&/288V	( ,,K--.NOI9~H3 -,-+--  9,	   NN;FKK=Hefg6""LASASD D	C24F3D9EKK"B5'PVP[P[} ]^!'!3!3Jf"/m"D$+LL[L$I	$vr)B)M)Mf)U)26):J%,\\&8\%LF%&**Y*C*N*Nv*V-3I->
4WX^XjXj4k148SS.6z.BO.6v7I7I.JO / 6 6/:P:P P !F / 4 4 : :3 ?2 E (  +-HIb-c-j-j*4\*B .k ."

 !*,, %HHZHZ$[!$(CC45NOVV . W J KK6zlCDJJz"fo6<<Z
 1L"112RSYWY	"+//"3&" H5GHHNN%>|nERXR]R]Q^^k#lm% I
Kfa(.. C%
%%&@AC%%%&>?C%
 !//0[\K(mDLHJ!**3T*: ouuVR(ts3t|7TffUm #JJDJ9?REE'N(b.1SWWU^)rr* !
 )j+V:%!&J'+v'>t\fgu  BK  AL  ]M  ]U  ]U  V\  Vh  Vh  jn  ]oHo  --S!'!5!5bkk3!GJ( ,!//7=2!ww}" " 0 0 9 ?R 0e1CT:Wb c :-).J'+H, KK3J<xHYGZZ]^abj^klomo^p]qrsz,==!HV[\b\g\g[hhuvw!JJx 1LL?@R@R?STU V$S!KK4\N!DE,V\6;;ON!!JL!0&++!V%%1 //0[\K(mDLHJ!**3T*: ouuVR(ts3t|7TffUm #JJDJ9?REE'N(b.1SWWU^)rr* !
 )j+V:%!&J'+v'>t\fgu  BK  AL  ]M  ]U  ]U  V\  Vh  Vh  jn  ]oHo --
!'!5!5bkk3!GJ( ,!//7=2!ww}" 0tZ U :-).J'+H, J*;;I*UXYabeceYfXghi

8$Aq!!@eTZT_T_S``mno! ++,WX 3;M $ I ! ,  ..rDC! "!'-'<'<">?S>TT_ `bd(H  ( (  (
 !..y9JRWWJ!iik"$%4S%9
%#*Du*D,6M"KK*STWSXX^_l^m(no!%"& % %->  & 4 4R__FZ [
' 6B#)#8#8:;O:PP[\^`$ $$ $ **95FFBC))+C !0!5J!cZ&@5&@"))*56  $'
OMKK"=c*o=NNu  wD  vE  !F  G
 " *  ..rDC! 	" ..y9JRWWJ!iik"$%4S%9
%#*Du*D,6M"KK*UVYUZZ`an`o(pq!	" % %& 3K@MKK:=/PVP[P[}]^ &"4"444NN>v{{mLM}
	bNN>>OO_`a	("'H % f'Z[bZc%deef&&  LA&JKKLKHJH ! SLL#Fxj!QRRSt ! t  ) ! ! ! $ %   : !   3FKK=3q6(KLsF  As .n ?n, 
B's 2Dn6 s A)s >o% s 'A#o- s (p 6:p9 0C%s As A8q q As 3Cs 6C1s (As -Aq0 q0 A*s <*s '%rr %Ar+r7%rs #'r0 r #Ar0 )r0 5Ar0 <s A(s 1s =%s "As ("s n)&s ,n30s 6	o"?os o""s %o*'s -	p6ps ps p63s 9p>;s 	q-
q("s (q--s 0	q=9s <q==s  	r	rrr	rs rs  	r-)r0 ,r--r0 0	r=9s <r==s  	s	s ss 	t.ttc                      e Zd ZdZe	 	 	 ddedededededefd       Z	ed	e
dededee
e   e
e   f   fd
       Ze	 	 	 ddedededededefd       Ze	 	 	 	 ddedee   dee   ded   dee   dee
e   e
e   f   fd       Zy)ScraperServicez1Service for scraping product prices from vendors.ri  
product_idenable_discoveryheadless_moder  rD   c                   K   | j                  t        t              j                  t        j                  |k(               d{   }|j                         j                         }|st        t        j                  d| d      | j                  t        t              j                  t        j                  dk(               d{   }|j                         j                         }|st        t        j                  d      g }	g }
t               }t               }|D ](  }t        |j                         }|j#                  |       * 	 t%        j&                  t(        j*                  ||||      }| d{   \  }	}
|rt,        j/                  d|        	 t               }|	D ]0  }|j1                  dd	      }t3        |      }|j#                  |       2 t,        j/                  d
t5        |       d       d}g }d}||k  r|dz  }t,        j/                  d| d|        t%        j&                  t6        |j8                  |j:                  t=        |j>                        |       d{   }t,        j/                  d| dt5        |       d       |jA                  |       t,        j/                  dt5        |       d       ||k\  rt,        j/                  d| d       n||k  rt,        j/                  dt5        |       d       |D ]  }d|vsd|d<    |	jA                  |       nt,        j/                  d|        t               }g }g }|	D ]  }|j1                  dd	      }|j1                  dd      }|j1                  d       }|j1                  d!      }|j1                  d"d      }||j:                  ||rtI        |d#      nd|f} | |vrU|j#                  |        |jK                  |       t,        j/                  d$| d%|dd&  d'|j:                   d(| d)| d*       |jK                  |       t,        j/                  d+| d%|dd&  d'|j:                   d(| d)| d,        |}	t,        j/                  d-       t,        j/                  d.t5        |	      t5        |      z           t,        j/                  d/t5        |	              t,        j/                  d0t5        |              t,        j/                  d1tM        d2 |	D                      t,        j/                  d3tM        d4 |	D               d5       t               }!|	D ]  }|j1                  d6      }"|d   }|d   }#|d    }|d!   }|d7   }$|j1                  d8      }%|j1                  d9      }&|"rd:}'n|j1                  d      d;k(  rd;}'nd}'t        |#      }d}(|"r	 tO        |"tF              rtQ        |"      n|"}(tY        |j                  |(|j:                  |j8                  tG        |jZ                        ||#||||%|&|$|'=      })| j#                  |)       |$d>k(  s|j1                  d"d      }*||j:                  |tI        |d#      |*f}+|+|!v r-t,        j/                  d?| d%|j:                   d@|#ddA         Zt]        | |#||j8                  |j:                  |||j1                  d"d      B       d{   },|,s|(rt        |#      }-n|}-|j1                  d"d      }.t_        |j                  |(||j:                  |||%|&|-ta        jb                         |#|j8                  tG        |jZ                        |'|.C      }/| j#                  |/       |!j#                  |+       |j#                  |+       t,        jW                  dD| d%|j:                   dE|. dF| dG| d*       ~t,        j/                  dH| d%|j:                   dF|         ta        jb                         |_2        ta        jb                         jg                         }0|0|_4        | jk                          d{    t,        j/                  dIt5        |
              | jm                  |       d{    t,        j/                  dJt5        |	       dKt5        |
       dL       ||j:                  |j8                  t=        |j>                        tn        |	|
t5        |	      |0t5        |
      ||rdMndNdO}1|1S 7 7 U7 7 # tB        $ r.}t,        jE                  dtG        |       d       Y d}~d}~ww xY w# tR        tT        f$ r t,        jW                  d<|"        d}(Y w xY w7 7 87 # tB        $ rf}2t,        jE                  dPtG        |2              | jq                          d{  7   t        t        jr                  dQtG        |2             d}2~2ww xY ww)Ra  
        Scrape a product against all registered vendors.
        
        Args:
            db: Database session
            product_id: Product ID to scrape
            enable_discovery: If True, also discover vendors via Google (disabled by default)
            headless_mode: Run browser in headless mode
            timeout_seconds: Timeout for page load
            
        Returns:
            Dictionary with scraping results
        NProduct with ID 
 not foundr:  detailTNo active vendors foundz/[Scraper] Google discovery ENABLED for product r   rx   z[Discovery] Starting with z vendor URLs to check againstr  r   r   z[Discovery] Attempt r   completed. Found r  z[Discovery] Added  vendor URLs to storagez[Discovery] Completed 	 attemptsz[Discovery] COMPLETE: Found z total unique discovery URLssource_type_hint
discoveredz([Discovery] Exception during discovery: r   z0[Scraper] Google discovery DISABLED for product rz   r   r   r   rj  r>   u   [Dedup] ✅ KEPT: r   rK   (Product: , MSP: ro  rn  u   [Dedup] ❌ REMOVED DUPLICATE: z1) - Same vendor already found from another sourcez
[Deduplication Summary]z  Input URLs: z  Unique results kept: z  Duplicates removed: z  Vendor scraped: c              3   D   K   | ]  }|j                  d       sd  ywr  r   NrJ   r   rs     r7   r   z0ScraperService.scrape_product.<locals>.<genexpr>  s     0Zqquu[GY0Z     z  Discovery found: c              3   D   K   | ]  }|j                  d       rd  ywrD  rE  rF  s     r7   r   z0ScraperService.scrape_product.<locals>.<genexpr>  s     1_AEER]L^!1_rH  r   r  r   r  r  
registeredserp_api_shoppingz%Could not convert vendor_id to UUID: )r3  r  r   r   reference_idrz   r   domain_namer   r   r  r  r   r  r   u1   ⚠️ IN-SESSION DUPLICATE VIOLATION PREVENTED: z at r   )rz   rj  r3  r  rz   r   r   r   r  r  marketplacer  ra   rr  rL  r  rj     🚨 VIOLATION SAVED: r  u    @ ₹u
    (MSP: ₹u/   ⚠️ DATABASE DUPLICATE VIOLATION PREVENTED: uF   [Scraper] ✓ Main violation commit completed. Violations in session: u   📊 [Response] Final Results: z unique URLs, z violations found in this runbrowser_with_serp_api_discoverybrowser_only)r3  r   r   r   pack_counts_scrapedr`   
violationstotal_resultslast_execution_timetotal_violationsdiscovery_enabledr     ❌ Fatal scraping error: Scraping failed: ):rs  r   r   rq  idrt  ru  r   r   HTTP_404_NOT_FOUNDr   	is_activeallHTTP_400_BAD_REQUESTra  r  r  addasyncio	to_threadr2  _run_scrapingrc   rd   rJ   r   rj   r  r   r   r   r   extendrk   r   rl   r)  ri   sumr  r   r   r   re   r   rL  rx  r   r   utcnowlast_scraped_date	isoformatrV  commitrefreshPACK_COUNTSrollbackHTTP_500_INTERNAL_SERVER_ERROR)3ri  r3  r4  r5  r  rp   r  vendor_resultvendorsr`   violation_recordscreated_violations_setr  r  r  scraping_taskvendor_normalized_urlsra   r  max_discovery_attemptsall_discoveredr:   r@  r  excseen_result_keysdeduplicated_resultsremoved_duplicatesrz   r   r   packs_dedup	dedup_keycreated_violationsr  r   vendor_statusr  r  ry   vendor_id_uuidscraping_resultpacks_vkviolation_keyis_duplicaterO  rj  r   execution_time_strresprq   s3                                                      r7   scrape_productzScraperService.scrape_product  s
    , zz&/"7"7

j8P"QRR.."((*"55)*Z@  !jj)=)=f>N>NRV>V)WXX'')--/"770 
 !$ !U 	+F(););<F""6*	+t	#--,,M 0=)<&G&  Mj\Z[1g-0U*") ?$jjr:%A#%F
.22:>?
 KK"<SAW=X<YYv wx ./*%'NG!$::1&:7)1E[D\$]^ ,3+<+<8#OO#00!'++.., &
 &:7)CUVYZdVeUffz${| '--j9&8Z8II`$ab #&<<"KK*@AW@XXa(bc!+ "$::0 KK">s>?R>SSo pq . D-T97CD!34D NN>2 Nzl[\  #u#% !#! Tjjr2$jj	B &

? ;jj'
 %jj!4('*>*>`mU=Z[E\sw  zE  F	$44$((3(//7KK"4[MS"XJkZaZnZnYoovwzv{  |E  FS  ET  TU  !V  W&--f5KK"A+cRUVYWYRZQ[[fgng{g{f|  }D  EH  DI  IR  S`  Ra  aR  !S  T%T( +GKK35KK.W<N8O)O(PQRKK1#g,@AKK05G1H0IJKKK,S0ZG0Z-Z,[\]KK-c1_W1_._-``bcd
 "%! aW"JJ{3	$]3#L1
 & 7Um &x 0#ZZ(:;
"JJ'>?	 )FZZ 237JJ0F)F,Z8 "&.<FyRU<Vi\e #1&zz,!(!5!5#OO!$W%9%9!: +) &"/%/*3&3 &#" ' !K/  &zz'15H%0'2F2FUS`bcMdfn$oM %(::&WXcWddghoh|h|g}  ~B  CM  NQ  OQ  CR  BS  %T  U .G
M7??GL`L`be(36::gq;Q. (
  ,-.A*.M.9$*JJw$:E(1+2::*8,7-4-A-A$'.;1;6?,7/7/@$./6-01E1E-F,2&+)I" FF9-.22=A266}E"NN-CK=PST[ThThSiiopuovv|  ~K  }L  LV  WZ  V[  [\  ,]  ^"KK*YZeYffijqj~j~i  @F  GT  FU  )V  WCaWH )1(9G%!)!2!<!<!>*<G' ))+KK`adevaw`xyz **W%%% KK9#g,~VYZkVlUm  nK  L  M ) ' 4 4"??W[[)'2"/!$W'9$'(9$:%5DT@ZhD KS S Y8 *=2&8 ! gLL#KCPSH:!VaeLffgL '	2 .)Nyk'Z[)-.J(V  &,  	LL5c!fX>?++-"AA*3q6(3 	s  AhdBh	d
Bh,f ;d!<"f Cd' 2d$3B d' 4+d'  d' 8J#f e!9A f Bf 2f3E)f f9f fBf hh!f $d' '	e0#ef ef !)f
f ff f f 	h	#4hg,hh		hro  c                     ddl g }g }t        j                  dd        t        j                  d j                          t        j                  d j                   dt
                t        j                  dt        |              t        j                  d d       d	t        d
t        dt        dt        t        t           t        t        t        t        f      f   f fd}t
        D ]4  }t         j                  |      }t         |      }	|	t        j!                  d| d       Bt        j                  dd d| d| d|	 dt        |       dd        g }
t#               5 }|D ci c]  }|j%                  ||||	|      | }}|D ]O  }||   }	 |j'                         \  }}|D ]  \  }}t        j/                  ||        |?|
j1                  |       Q 	 ddd       |
D cg c]
  }|d   	| }}|
D cg c]  }|d   dk(  s| }}t        |      t        |      z
  }t        j                  d| dt        |       dt        |       dt        |       d| 
       |
D ]H  }dd d!d"j3                  |d   d#      }t        j                  d$| d%|d&   d'd(|d   d)d*|d+    d,	       J |j5                  |
       |
D ]D  }|d   dk(  s|j1                  |d-   |d&   |d.    j                  |d   |d+   |d/   |d0   d1       F t7        d2d3       7 t        j                  dd4        t        j                  d5 j                          t        j                  d6t        |              t        j                  d7t        |              |rKt        j                  d8       |D ]1  }t        j!                  d9|d/    d:|d&   d'd(|d   d)d;|d+           3 t        j                  d4 d       ||fS c c}w # t(        $ r7}t        j+                  d| d|j,                   d|        dg }}Y d}~d}~ww xY w# 1 sw Y   pxY wc c}w c c}w )<u  
        Run the actual scraping in a separate thread.

        Hybrid strategy:
          - Sequential by pack: process PACK_COUNTS [1, 2, 3, 4, 5, 6, 12] one at a time.
          - Parallel by vendor: for each pack, scrape ALL vendors concurrently using
            a ThreadPoolExecutor (no cap — fire them all at once per pack).

        Logging strategy:
          Each vendor thread writes to a private list buffer instead of calling
          the global logger directly. Once the thread finishes, its buffer is
          flushed atomically to the real logger in one block. This prevents the
          interleaved log mess that happens when parallel threads all write to
          the same logger simultaneously.
        r   Nr   r   z*[Scraper] Starting multi-pack scrape for: z[Scraper] Base MSP: z | Pack counts: z[Scraper] Vendors: r   r  r  rD   c                    g dt         dt        ffd}j                  }j                  }j                  }d| d| j
                   d}| j                  s ||| d       dfS d}		 t        	      }	t        | |	||      \  }
}|	r	 t        |	       |	j                          |
 ||| d       dfS |r|n| j                  }t        ||
      }t        ||
      \  }}|dk(  r ||| d|
dd| d| d| d|        nH|dk(  r. ||| d|
dd| dt        |       dt        |       d|        n ||| d|
dd| d|        t        | j                        | j
                  ||
|||||d	fS # t        $ r} ||| d
|        d\  }
}Y d}~d}~ww xY w#  Y xY w# |	r$	 t        |	       |	j                          w #  Y w xY ww xY w)a  
            Scrape a single vendor for a single pack variant.

            All log messages are collected into a local `log_buffer` list as
            (level, message) tuples and returned alongside the result.  The
            caller flushes them to the real logger once this thread completes,
            keeping log output clean and non-interleaved.

            Returns:
                (result_dict_or_None, log_buffer)
            levelmsgc                 ,    j                  | |f       y r   )ri   )r  r  
log_buffers     r7   _logzEScraperService._run_scraping.<locals>.scrape_one_vendor.<locals>._log  s    !!5#,/r9   [Pack][]u    No website URL — skippingNrL  u     ❌ Exception during scraping: r  u    ❓ No price foundr   u    🚨 VIOLATION | scraped=€.4fu
    < msp=€u    | diff=€r  z%) | r/  u     ⚠️  COMPLAIN  | scraped=€u
    > msp=€u    | diff=+€u    ✅ COMPLIANT | scraped=€u
    = msp=€ | )	r  rz   r   r   r   rj  r   r  r  )intrl   INFOWARNINGERRORr  r  rZ  r0  rk   r  r^  r0  r-  absr[  )r  r   r  r  r  r  r  r  r  r  r   r  rv  effective_urlr}  r  r  r  _loggingr5  r  r  s                    @r7   scrape_one_vendorz7ScraperService._run_scraping.<locals>.scrape_one_vendor  s    13J0C 0c 0 mmD&&GnnE*R}A6C%%W%ABCZ''F3]K,AVV\8_-)z .v6 $W%89:Z''*4J&:L:LM7-PM$>x$W!J	+We ""/!4Jxj I)l"YKu]OM
 *,Te ""/!4Jxj I  #J03y>2B%X Te ""/!4JxjM?\ !^%{{+!.#'$.)2
 
 
K  7Use#CC5IJ,6)z7 .v6 sN   -E; F% ;	F"FF, F""F, %F),G0GGGGr  u    ] No MSP configured — skippingu   ──────────────────────────────────────────────────────────────────────z
  PACK z  |  query='u   '  |  msp=€z
  Vendors to scrape: r  z] Future error: r   r   r   z
  Pack z summary | found: r  z | violations: z | no price: u   🚨u   ⚠️ u   ✅)r   r/  r   u   ❓z    z  rz   z<30u    €r  u
     (msp €r   rn  r  r   rj  r  )r  rz   r   r   r   r   rj  r  rR  r  zF======================================================================z  SCRAPE COMPLETE: z  Total results : z  Total violations: z  Violations found:u       🚨 Pack r  u
    < msp €)loggingrc   rd   r   r   rk  rj   rl   r   r  r   r   r   r   r  r  re   r   r  rp   rk   r   r  logri   rJ   rd  r8   )r  ro  r5  r  r`   rp  r  r  r   r  pack_resultsexecutorr  futuresfuturerp   r  rv  r  r  rG  rr  rT  no_pricestatus_iconvr  s   ` ``                      @r7   rc  zScraperService._run_scraping  s!   " 	#bM"@AUAU@VWX*7;;-7G}UV)#g,89vhbM"P	C P	5 P	VY P	^cdlmqdrtxy~  @C  EH  @H  zI  uJ  eJ  _K P	 P	f & G	"J01E1EzRL%gz:Hzl2RSTKKXJ $\,}XJ W((+G~R* (*L#% 4 #* OO$5v|XWabdjj  & 4F$V_F6-3]]_*
 '1 /
s

5#./ )$++F344* %1SqAo4F4RSES%1PQx[K5O!PJPGs5z1HKKJ< (e*Qs7|n 5":/ 0%J( " ,2	X]^bbcdemcnpuv;-r!M*:3)? @O,S1AeH:QH NN<(! X;+-%,,*+K.*+M*:*+L/*1*>*>*+O*<*+E(*+G*,-.@,A	. 	 S!OG	"T 	bM")'*>*>)?@A(W78*3/@+A*BCDKK-.& $QwZLAm4DS3I JO,S1AeH:G
 	vhbM")))K % 6uZL6;;-GWX[W\%]^-12
64 4* TPsf   6Q;PQ%P8"QQ;
Q*Q*Q/Q/Q	Q	#,Q	QQ	QQ'	c                   K   | j                  t        t              j                  t        j                  |k(               d{   }|j                         j                         }|st        t        j                  d| d      | j                  t        t              j                  t        j                  dk(               d{   }|j                         j                         }|st        t        j                  d      g }	g }
t               }t               }|D ](  }t        |j                         }|j#                  |       * 	 t%        j&                  t(        j*                  ||||      }| d{   \  }	}
|	D ]  }|j-                  d      dk(  s|j-                  d	      }|d
   }|d   }|d   }t/        |d         }|j-                  d      }|j-                  d      }|r d}t1        |t2              rt5        |      n|}nd}d}t7        | |||j8                  |j:                  ||       d{   }|r|rt        |      }n|}|j-                  dd      }t=        |j                  |||j:                  |||||t?        j@                         ||j8                  t3        |jB                        ||      }| j#                  |       tD        jG                  d| d|j:                   d| d|dd  d| d        |rtD        jI                  d|        	 d}g }d}||k  r|dz  }tD        jI                  d | d!|        t%        j&                  tJ        |j8                  |j:                  t/        |jL                        |       d{   } tD        jI                  d | d"tO        |        d#       |jQ                  |        tD        jI                  d$tO        |        d%       ||k\  rtD        jI                  d&| d'       n||k  rt               }!g }"g }#|D ]  }$|$j-                  dd(      }%|$j-                  d
d)      }|$j-                  d      }|$j-                  d      }||j:                  ||rtS        |d*      ndf}&|&|!vrU|!j#                  |&       |"jU                  |$       tD        jI                  d+| d|%dd,  d-|j:                   d.| d/| d       |#jU                  |$       tD        jI                  d0| d|%dd,  d-|j:                   d.| d/| d1        |"}tD        jI                  d2tO        |      tO        |#      z    d3tO        |       d4tO        |#       d5       |D ]  }$|$j-                  d      dk(  s|$j-                  d
d)      }'|$j-                  dd(      }(|$j-                  d      })t/        |$j-                  d|jL                              }*t=        |j                  d|'|j:                  |*|$d   |$j-                  d      |$j-                  d      |'|$j-                  dd(      |j8                  t3        |jB                        d6t?        j@                         7      }| j#                  |       | jW                          d{    |j#                  |'|j:                  |*tS        |)d*      f       tD        jG                  d8|' d|j:                   d|)        |
jU                  |'|(|j:                  |)|*|$j-                  d      d9        tD        jI                  d:tO        |       d;       |	jQ                  |       tD        jI                  d<tO        |       d=       | jW                          d{    tD        jI                  d>       t?        j@                         |_.        t?        j@                         j_                         },|,|_0        | jc                          d{    tD        jI                  dAtO        |
              | je                  |       d{    tD        jI                  dBdC        tD        jI                  dD|        tD        jI                  dEtO        |	              tD        jI                  dFtO        |
              tD        jI                  dGtO        |              |
rztD        jI                  dH       |
D ]`  }-tD        jI                  dI|-j-                  d
       dJ|-j-                  dK       d|-j-                  d       dL|-j-                  d       d	       b tD        jI                  dC dB       ||j:                  |j8                  t/        |jL                        |	|
tO        |	      |,tO        |
      |dMdNS 7 	i7 7 87 y7 !7 7 .# tX        $ r.}+tD        j[                  d?t3        |+       d@       Y d}+~+Id}+~+ww xY w7 7 # tX        $ rf}+tD        j[                  dOt3        |+              | jg                          d{  7   t        t        jh                  dPt3        |+             d}+~+ww xY ww)Qa  
        Scrape a product against all registered vendors using SERP API for discovery.
        Same functionality as scrape_product, but uses SERP API Google Shopping Light 
        Engine instead of Tavily for discovering alternative vendors.
        
        Args:
            db: Database session
            product_id: Product ID to scrape
            enable_discovery: If True, also discover vendors via SERP API Shopping (disabled by default)
            headless_mode: Run browser in headless mode
            timeout_seconds: Timeout for page load
            
        Returns:
            Dictionary with scraping results
        Nr7  r8  r9  Tr;  r   r   r  rz   r   r   r   r  r  rJ  r@  )rz   rj  r   rN  rP  r   r  z @ r   z
 (Source: rn  z6[Scraper SERP] SERP API discovery ENABLED for product r  r   z[Discovery SERP] Attempt r  r<  r  z[Discovery SERP] Added r=  z[Discovery SERP] Completed r>  rx   r   r>   u   [Discovery Dedup] ✅ KEPT: rK  rA  rB  ro  u)   [Discovery Dedup] ❌ REMOVED DUPLICATE: z) - Same vendor already foundz[Discovery Dedup] Before: z results, After: z unique results (z duplicates removed)rK  )r3  r  rz   r   r   r   r  r  rO  ra   rr  rL  r  r  u   🔍 SERP VIOLATION SAVED: )rz   r   r   r   r   r  z![Discovery SERP] COMPLETE: Found z* unique discovery URLs after deduplicationz'[Discovery SERP] Results extended with z discovery vendorsuP   [Discovery SERP] ✓ Violations flushed. Will commit after registration updates.z.[Discovery SERP] Error during SERP discovery: r   uN   [Scraper SERP] ✓ SERP violation commit completed. Total violations tracked: r   r   u1   [Scraper SERP] ✓ Scraping complete for product z[Scraper SERP] Total Results: z)[Scraper SERP] Total Violations Created: z5[Scraper SERP] Violations in created_violations_set: z [Scraper SERP] Violations saved:z  - r   r   rm  rQ  )r3  r   r   r   r`   rT  rU  rV  rW  rX  r  rY  rZ  )5rs  r   r   rq  r[  rt  ru  r   r   r\  r   r]  r^  r_  ra  r  r  r`  ra  rb  r2  rc  rJ   r   r  rl   r   rx  r   r   r   r   rf  rL  rc   re   rd   r  r   rj   rd  r)  ri   flushrk   r   rg  rh  rV  ri  rj  rl  rm  ).ri  r3  r4  r5  r  rp   r  rn  ro  r`   rp  rq  r  r  r  rr  r  rz   r   r   r   r  r  ry   r~  r  rO  	packs_valr   rt  ru  r:   r@  seen_discovery_keysdeduplicated_discoveredremoved_discovery_duplicatesr  ra   r{  vendor_name_serpvendor_url_serpscraped_price_serpmsp_serprq   r  r  s.                                                 r7   scrape_product_serpz"ScraperService.scrape_product_serpp  sN    0 zz&/"7"7

j8P"QRR.."((*"55)*Z@  !jj)=)=f>N>NRV>V)WXX'')--/"770 
 !$ !U 	+F(););<F""6*	+a	#--,,M 0=)<&G& " 1`::h';6 &

; 7I"("7K!'!5J$*?$;Mu.C!',>!?J &

+B CI !!-<FyRU<Vi\e!-)- *CJwH\H\^aoz* $L ()*=j*IK*5K$*JJw$:	$-'.zz&4(3)0)=)= #*7-72;(3+3??+< *+2??),W-A-A)B(."+%	" y))?}CPWPdPdOeeklukvvy  {E  FI  GI  {J  zK  KU  V\  U]  ]^  (_  `c1`h  TU_T`abnk-.*%'NG!$::1&?yJ`Ia$bc ,3+<+<=#OO#00!'++.., &
 &?yHZ[^_i[jZkk  %A  B '--j9&=c*o=NNe$fg #&<<"KK*EF\E]]f(gh!+ "$::4 +.%'.0+350 . R"hh|R8&*hh}i&H(,(A"hhuo &1'2F2FhuUS`bcMd{  %A	$,??/33I>3::4@"KK*F{mSVWZ[^\^W_V``kls  mA  mA  lB  BI  JM  IN  NW  Xe  Wf  fg  )h  i8??E"KK*ST_S``cdghkikdlcmmx  zA  zN  zN  yO  OV  WZ  V[  [d  er  ds  sP  )Q  R!R$ &=NKK"<S=PSVWsSt=t<u  vG  HK  LZ  H[  G\  \m  nq  rN  nO  mP  Pd  !e  f !/ (88H-</3xxy/Q,.2hh|R.HO15/1J.',TXXeW[[-I'JH )2+2::*.,<-4-A-A$,.2?.C15:L1M6:hh?V6W,<$(HH\2$>/6-01E1E-F,?/7/@)I  FF9-"$((*,,2668H'J^J^`hjo  qC  EF  kG  8H  I"NN-HIYHZZ]^e^r^r]ssv  xJ  wK  ,L  M .44/?.=070D0D1C'/48HH=O4P6 C(V KK"CCDWCX  YC  !D  ENN>2KK"I#nJ]I^^p qr ((*$$KK"rt )1(9G%!)!2!<!<!>*<G' ))+KKhilm~i  iA  B  C **W%%%KK"VH&KKKJ<XYKK8WGHKKCCHYDZC[\]KKOPSTjPkOlmn !>@* IAKK$quu]';&<Bquu^?T>UUXYZY^Y^_nYoXppwxyx}x}  D  yE  xF  FG  !H  II KK6("& ) ' 4 4"??W[[)"/!$W'9$'(9$:%5@ U S Y8 *=.$\&T -* % ! kLL#QRUVWRXQY!ZeiLjjk  &<  	LL5c!fX>?++-"AA*3q6(3 	s  Ai4f-Bi4	f0
Bi4,h ;f3<!h Bh =f6>h C h &A2g f9B g E9g Dg f< C1g f?g +Ah  g<9h :g?;E1h ,i40i43h 6h 9g <g ?g 	g9#g4.h 4g99h ?h 	i14i,?i ,i,,i11i4Nr   r   r   r  r   c                   K   g }g }	 t         j                  d       t        j                  j	                  d      }|st         j                  d       ||fS |dk(  rt         j                  d       ||fS |  }t         j                  d| d       d}	d	||d
d
d
ddddd
}
	 t        j                  |	|
d      }|j                          |j                         }|s||fS d|v r+t         j                  d|j	                  d              ||fS |j	                  dg       }|j	                  dg       }|j	                  dg       }t         j                  d       t         j                  dt        |       d       t         j                  dt        |       d       t         j                  dt        |       d       t         j                  dt        |j                                       |r7t         j                  d |rt        |d!   j                               nd"        nt         j                  d#       t!               }|r,t         j                  d$t        |       d%       t#        |      D ]  \  }}	 |j	                  d&d'      }|j	                  d(      xs |j	                  d)      }|j	                  d*d'      }|j	                  d+d'      }|j	                  d,      }|svt%        |      }|j	                  d+|j&                  j)                  d-d'            }d}|rU	 t        |      j)                  d.d'      j)                  d/d'      j)                  d0d1      j+                         }t-        |      }|d2z   ||||d3|d4}|rB||d5<   |r0t3        ||      \  }} ||d6<   | |d7<   t5        ||      }!|!|d8<   |!|d9<   nd:|d8<   d:|d9<   n
d:|d8<   d:|d9<   |j7                  |       |r |r|r|r|j	                  d8      d;k(  r	 ||j8                  |t;        |d<      f}"|"|vr|}#t=        |j>                  d||j8                  t-        |      | |#tA        jB                         ||jD                  t        |jF                        d=>      }$|jI                  |$       |jI                  |"       |j7                  ||j8                  t-        |      ||| |#|d=d?	       t         jK                  d@| dA|j8                   dB| dC| dD	       |rdA| ndF}|rdG| nd'}&|r$dH|j	                  d8d:      jO                          dInd'}'t         j                  dJ|d2z    dK|ddL  dM| |& |' dN| dD        n^|r%t         j                  dPt        |       dQ       |}n7|s5t         jK                  dR       |r|r|jQ                          d{    ||fS |rt         j                  dSt        |       dT       t#        |ddU       D ]q  \  }})	 |)j	                  d&d'      }|)j	                  d*d'      }*|)j	                  dVd'      }+|)j	                  dW|d2z         },|*sVt%        |*      }|j&                  j)                  d-d'      }d}d}-|)j	                  dXi       }.|.r^dY|.v rZ|.j	                  dYi       j	                  dZi       }/d(|/v r4t-        |/j	                  d(            }dX}-t         jS                  d[|        |sd)|)v r|)j	                  d)      }d)}-nm|skd(|)v rg|)j	                  d(d'      }	 t-        t        |      j)                  d.d'      j)                  d/d'      j)                  d0d1      j+                               }d\}-|so|+rm	 g d]}0|0D ]c  }1tU        jV                  |1|+      }2|2s|2jY                  d2      j)                  d0d1      }	 t-        |      }d^}-t         jS                  d_|         n |sf|*rdt         j                  da|*ddb         	 t[        |*|        d{   }3|3r|3}dc}-t         j                  dd|        nt         jS                  de       |,||*||+dgdh}|rG||d5<   |-|di<   |r0t3        ||      \  }} ||d6<   | |d7<   t5        ||      }!|!|d8<   |!|d9<   nd:|d8<   d:|d9<   n
d:|d8<   d:|d9<   |j7                  |       |r/|r,|r)|r&|j	                  d8      d;k(  r	 ||j8                  |t;        |d<      f}"|"|vr|}#t3        ||      \  }} t=        |j>                  d||j8                  t-        |      ||| |#tA        jB                         |*|jD                  t        |jF                        d=>      }$|jI                  |$       |jI                  |"       |j7                  ||j8                  t-        |      ||| |#|*d=d?	       t         jK                  d@| dA|j8                   dB| dC| dD	       |r
dj|dkdN|- dDndl}|r$dH|j	                  d8d:      jO                          dInd'}'t         j                  dm|d2z    dK|ddL  dM| |' dN| dD
       |s|+rt         jS                  dn|+ddo  dM       t |r<|r:|jQ                          d{    t         j                  dqt        |       dr       t         j                  dst        |       dtt        |       du       ||fS # t        j                  j                  $ r/}t         j                  dt        |              ||fcY d}~S d}~wt        $ r/}t         j                  dt        |              ||fcY d}~S d}~ww xY w# t        t.        t0        f$ r Y w xY w# tL        $ r,}%t         jK                  dEt        |%              Y d}%~%d}%~%ww xY w# tL        $ r/}(t         jK                  dO| dKt        |(              Y d}(~(	d}(~(ww xY w7 4# t        t.        t0        f$ r d}Y w xY w# t        t0        f$ r Y w xY w# t        t.        t0        f$ r,}(t         jS                  d`t        |(              Y d}(~(~d}(~(ww xY w7 W# tL        $ r,}4t         jS                  dft        |4              Y d}4~4Qd}4~4ww xY w# tL        $ r,}%t         jK                  dEt        |%              Y d}%~%d}%~%ww xY w# tL        $ r/}(t         jK                  dp| dKt        |(              Y d}(~(d}(~(ww xY w7 # tL        $ rK}(t         j                  dvt        |(       dwx       |r|j]                          d{  7   Y d}(~(||fS d}(~(ww xY ww)yaX  
        Search using SERP API Google Search Engine (regular search, not shopping).
        Optionally stores violations in database following same schema as SERP Shopping Light API.
        
        Args:
            product_name: Product name for search
            barcode: Product barcode (optional)
            msp: Minimum Selling Price (optional, used for violation detection)
            product: Product object (required for database storage)
            db: AsyncSession (required for database storage)
        
        Returns:
            Tuple of (results_list, violations_list)
        zC[Google Search SERP] Using SERP API Google Search Engine for searchr   uH   [Google Search SERP] ❌ SERP_API_KEY not found in environment variablesr   uA   [Google Search SERP] ❌ SERP_API_KEY is set to placeholder valuez$[Google Search SERP] Search query: 'r   r   r  Spainz
google.comenusdesktop)
r   r   rY   r   location_requestedlocation_usedgoogle_domainhlr   devicer   r   z*[Google Search SERP] HTTP request failed: Nz4[Google Search SERP] Failed to parse JSON response: r   z [Google Search SERP] API Error: r   r   inline_shoppingu5   [Google Search SERP] 🔍 SERP API Response Analysis:z*[Google Search SERP]   - organic_results: rb   z+[Google Search SERP]   - shopping_results: z*[Google Search SERP]   - inline_shopping: z-[Google Search SERP]   - All available keys: uC   [Google Search SERP] ✅ GOOD! Found inline_shopping. Sample keys: r   r   uU   [Google Search SERP] ⚠️ No inline_shopping in response. Will use organic_results.z[Google Search SERP] Found z3 INLINE SHOPPING products with prices! Using those.rw   rx   r   r   r   ry   ratingr  r   r   r   r   r   google_inline_shopping)positionrz   r   rw   	price_rawry   r  r   r  r  r   r   r   r   r>   serp_api_google_search)r3  r  rz   r   r   r   r  r  rO  r  ra   rr  rL  r  )	rz   r   r   r   r  r  rO  ra   r  rP  r   u    @ €u
    (MSP: €rn  z/[Google Search SERP] Error creating violation: z (no price)u    ⭐z [r  u)   [Google Search SERP] ✓ Inline Shopping r   rW  r  r  z3[Google Search SERP] Error parsing inline shopping z[Google Search SERP] Using z shopping results insteadu;   [Google Search SERP] ⚠️ No results of any type returnedz [Google Search SERP] Processing z organic resultsrN  r}   r  rich_snippetbottomdetected_extensionsu<   [Google Search SERP]   💰 Found price in rich_snippet: €direct_price)r6  r7  u   ([\d,]+,[\d]{2})\s*€u   €\s*([\d,]+,[\d]{2})snippet_regexu=   [Google Search SERP]   💰 Extracted price from snippet: €z0[Google Search SERP]   Price extraction failed: uU   [Google Search SERP] 🔗 No price found in API response. Attempting website scrape: rK  website_scrapeu9   [Google Search SERP]   ✅ Website scrape successful: €u>   [Google Search SERP]   ⚠️ Website scrape returned no pricez-[Google Search SERP]   Website scrape error: google_organic)r  rz   r   rw   r}   ry   price_sourceu    - €r   z (no price found)u!   [Google Search SERP] ✓ Organic z [Google Search SERP]   Snippet: r'  z2[Google Search SERP] Error parsing organic result u#   [Google Search SERP] ✓ Committed z violations to databaseu;   [Google Search SERP] ✓ Google Search completed. Returned z
 results, z violationsz$[Google Search SERP] Search failed: Tr   )/rc   rd   rH   rI   rJ   r   rf   rh   r^   r   r   rl   r   rj   r  keysra  r   r	   r  r   r   r   r   r   r-  r0  ri   r   r)  r   r[  r   rf  r   rL  r`  re   rk   r   ri  r   r"  rA  rC  rI  rl  )5r   r   r   r  ri  r`   rT  rY   r   r   r   rn   ro   r   r   r   r   r  r|  r   r  rw   r   r   ry   r  
parsed_urlrz   price_floatr   r   r  r  
compliancer  rO  r   violation_err
rating_str
status_strrq   rp   ra   r}   r  r  r  r  patternsrG  price_matchscraped_website_pricescrape_errors5                                                        r7   search_google_serpz!ScraperService.search_google_serp  s,    , 
p	$KK]^jjnn^4G gh
**22`a
** +^LKK>|nANO 9H #!"#&-!(!-#F	+#<<L))+}} 
** $?@Q?RST
** #hh'8"=O#xx(:B?"hh'8"=OKKOQKKDSEYDZZbcdKKEcJZF[E\\defKKDSEYDZZbcdKKGTYY[HYGZ[\a  FUbfgvwxgyg~g~  hA  cB  [`  ba  b  csu "% 9#o:N9O  PC  D  E!*?!; g!ICf! $" 5 $ 1 PTXX>O5P#xx3!%(B!7!%(!3#$ &.d^
&*hhx9J9J9R9RSY[]9^&_ '+ %,/J,>,>ub,I,Q,QRUWY,Z,b,bcfhk,l,r,r,t	.3I.>
 ),a+6*.%*).&>&,' ';FK8  #8RSVXc8d 5
IBL,> ?GP,C D .Ik-Z
8BH 5CM,? @8AH 5CL,? @4=K1?HK(;<{3 'kckooV^F_cnFn#w1<g>R>RTWY^_jlmYn0o#08J#J2=K093:::264?5<5I5I,1#J6A9C>G4?7?7H,07>589M9M5N4L1&I  %'FF9$5$6$:$:=$I$.$5$57B8?8L8L/4Sz9D<FAJ7B/37O
7& 
%' %+NN5KK=X[\c\p\p[qqw  yD  xE  EO  PS  OT  TU  4V  %W 6;c%M	8>tF8_B
]br+//(I*N*T*T*V)WWX%Yhj
&OPSVWPWyXZ[`adbd[eZffijsitu  uA  BL  AM  MO  P[  O\  \]  %^  _Gg!R "9#>N:O9PPijk"2$!\^,))+%%
** >s??S>TTdef#,_Sb-A#B a!KC`! &

7B 7$jj4"(**Y";#)::j#'#B"$ &.c]
&0&7&7&?&?&K !%'+ (.zz."'E'H,D2>2B2B8R2P2T2TUjln2o/&*==(-.A.E.Eg.N(O/= &/klqkr-s t  %):f)D$*JJ/@$AE+<L!&7f+<(.

7B(?I-(-c)n.D.DUB.O.W.WX[]_.`.h.hilnq.r.x.x.z({/=
  %%," 08 
!5G24))GW2MK'24?4E4Ea4H4P4PQTVY4Z	)549)4DE;JL,2LL;xy~x  :A  -B,1
!5   %"KK*  AD  EH  FH  AI  @J  )K  L
%>WX[]i>j8j 5#8,AE3CL$*KK2klqkr0s$t$*LL3q$s )1+6*-%*'.&6' !;@K8:FK7  #8RSVX]8^ 5
IBL,> ?GP,C D .Ie-T
8BH 5CM,? @8AH 5CL,? @4=K1?HK(;<{3 'ePX@Y]h@h$w1<g>R>RTWY^_dfgYh0i#08J#J2=K<VWZ\a<b$9J	093:::264?5<5I5I,1#J6;9C>G4?7?7H,/7>589M9M5N4L1&I  %'FF9$5$6$:$:=$I$.$5$57B8?8L8L/4Sz9><FAJ7B/27O
7& 
%' %+NN5KK=X[\c\p\p[qqwx}w~  I  JM  IN  NO  4P  %Q NSfU3Kr,q$IXk	]br+//(I*N*T*T*V)WWX%Yhj
&GayPRSXY\Z\S]R^^abkalmwlxxz  |G  {H  HI  %J  K$"LL+KGTXUXM?Z])^_{a!H jiik!!A#j/ARRijkKKUVYZaVbUccmnqr|n}m~  J  K  L 
""[
 &&77 +I#i.IYZ[
** +STWXbTcSdef
**+r %/	#J % $%P $- w &1`aderas`t/u v vw % !)\]`\aacdghidjck'lm ! &V %/	#J -(,-. 1;I/F )5,4)5$.	#J % &/_`cde`f_g-h i $% 9k $- % &/\]`am]n\o-p q $%T $- w &1`aderas`t/u v vw % !)[\_[``bcfghcibj'kl ! "
  	$LL?AxHSWLXkkm##
""	$s  u?At( u?t( 4u?5+t( !8k+ t( u?.t( u?E/t( >A-o+t( ,;o(Am0<Bo
DnA&o1At( 
o>t( u?:t( As*t( Cs*,Ap s*!p4'!p4	%p.p41s*q: q7!8q:Bs*(Dr28B s*8t( t%At( 'u?+m-$l2,m--t( 1u?2m->$m("m-#t( 'u?(m--t( 0nono	o !n;5o;o  o	o;$o60t( 6o;;t( ps*ps*p1-p40p11p44q4!q/)s*/q44s*7q::	r/!r*$s**r//s*2	s';!s"s*"s''s**	t"3$tt( t""t( (	u<18u7)u,*u7/u?7u<<u?)FTr]   )NNNN)r@  
__module____qualname____doc__staticmethodr   r  boolr   r  r   r   rc  r  rl   r   r   r  r  r9   r7   r2  r2  |  s   ; "'"!fff f 	f
 f 
f fP	 E* E*T E*TW E*\abfgkblnrswnxbx\y E* E*N  "'"!UUU U 	U
 U 
U Un  "&#'+%)J#J##J# e_J# )$	J#
 \"J# 
tDz4:%	&J# J#r9   r2  )r  g      N@r   )rx   )Nr   )rV  r   )Nr   )rX  r  )TF)r  )r]   ){r4   r"  r^   r2   r  ra  rH   r  r   r   typingr   r   r   r   urllib.parser	   r
   rg  r   r   concurrent.futuresr   rf   bs4r   seleniumr   !selenium.webdriver.chrome.servicer   !selenium.webdriver.chrome.optionsr   selenium.webdriver.supportr   r  webdriver_manager.chromer   selenium.webdriver.support.uir   selenium.webdriver.common.byr   selenium.webdriver.common.keysr   'selenium.webdriver.common.action_chainsr   sqlalchemy.ext.asyncior   rp  r   fastapir   r   r   r   app.models.productr   app.models.vendorr   app.models.violationr   app.models.scraping_resultr   r    playwright.sync_apir!   r  ImportError	getLoggerr@  rc   r9  rK   HTTP_HEADERSr   r8   r  rC   rl   rO   rT   rr   ru   r  r   r   r  	_executorr  r  r   r%  r-  r0  rI  r;  rx  r  r  r  r  r  r1  rk  r  r  r  r  r   r>  rH  rP  rS  rm  rZ  r  r  r_  r  ra  r  r  r0  r2  r  r9   r7   <module>r     s    	     	  ( . . ,  1    5 5 @ 8 7 + / @ /  )   & $ * 5 #!3 
		8	$  M*0 S%  !$ U  	s 	 	PU 	 "(3- HSM &S &T#Y &R$# $$s) $T d <|# | |% |4PT: |~@H 1-		H-S -S -#c #c #"s x 5 U5%<=P *HUO  <\ \C \RW \~O3 Oc O8TY? Op "&II	I E?I 	I
 I 
I #I I 
IX3  T $N\c \# \X[ \emnqer \|' ' 'T}S } } }N ![AD&U )H#Y+s $[  %
' 
' 
' 
'  : #  #tCy(3-7O1P  #F3 3 3c 3YabeYf 3kn 3lNS N NUX Ndlmpdq Nd5 5 E E 
	U U UR[RbRb Ur	H:P# PC P PdH H% HZU U3 Ut Upi# iS iu ibe ijnosjt iXHs H# HE Hgj Hostxoy Hb O 	O
 O O 8E?HSM)*OdW# W#gZ  ! !s   K; ;LL