#!/usr/bin/env python3 # -*- encoding: utf-8 -*- import sys import urllib.parse from pathlib import Path from typing import AnyStr, Callable, Dict, List, Optional, Tuple import colored as clrlib import html2text as html2textlib from bs4 import BeautifulSoup from .system import simpleDownloader def html2text(html, withMd=True, limit=65535): h = html2textlib.HTML2Text(baseurl="", bodywidth=limit) if not withMd: h.ignore_emphasis = True h.ignore_images = True h.ignore_links = True h.ignore_tables = True return h.handle(html) def extract_subreddits_from_page(pagebs: BeautifulSoup) -> Tuple[AnyStr, List[Dict[str, str]]]: nextbutton = pagebs.find(class_='nav-buttons') if nextbutton: nextbutton = nextbutton.find(class_='next-button') if nextbutton: nextbutton = nextbutton.find('a') if nextbutton: nextbutton = nextbutton['href'] srs = list() srtbs = pagebs.find(id='siteTable') # print(srtbs) for srbs in srtbs.find_all(class_='subreddit'): isNsfw = srbs.find('span', alt='NSFW') is not None titlebs = srbs.find('a', class_='title') descriptionbs = srbs.find(class_='description') # descriptionPrettyHtml = descriptionbs.prettify() link = titlebs['href'] if '/r/' not in link: continue name = titlebs.text subreddit = name.split(':', 1)[0].split('/', 1)[-1].lower() title = name.split(':', 1)[1][1:] description = html2text(str(descriptionbs), False, 60).strip() srs.append(dict( isNsfw=isNsfw, link=link, subreddit=subreddit, title=title, description=description, )) # print(isNsfw) # print(subreddit) # print(title) # print(link) # print(description) # print() # print('-'*79) # print() # raise Exception() return (nextbutton, srs) def pad_text_block(s: str, wth: str) -> str: return '\n'.join(list(map( lambda l: f'{wth}{l}', s.splitlines() ))) def findmany(text: str, terms: List[str]) -> Tuple[int, str]: if len(terms) <= 0: return -1, None else: incidences = dict() for term in terms: pos = text.find(term) if pos >= 0: incidences[pos] = term if len(incidences) <= 0: return -1, None else: m = min(incidences.keys()) return m, incidences[m] def highlight_search_term(terms: List[str], text: str, styler: Callable[[str], str], case_insensitive: bool = True) -> str: termso = terms texto = text textl = text.lower() if case_insensitive else text termsl = list(map(str.lower, terms)) if case_insensitive else terms buffo = '' while True: matchpos, matchtrm = findmany(textl, termsl) if matchpos < 0: buffo += texto break else: buffo += texto[:matchpos] buffo += styler(texto[matchpos:matchpos+len(matchtrm)]) texto = texto[matchpos+len(matchtrm):] textl = textl[matchpos+len(matchtrm):] return buffo def do_search(term: str, include_nsfw: bool = True, colored: Optional[bool] = True) -> List[Dict[str, str]]: simpleDownloader.cleanCookies() simpleDownloader.setCookies({'over18': 1}) next_page_url = ( 'https://old.reddit.com/subreddits/search?' + ('include_over_18=on&' if include_nsfw else '') + 'q=' + urllib.parse.quote_plus(term) ) srs = list() srlst = list() nothing_new = True while next_page_url: pagebts = simpleDownloader.getUrlBytes(next_page_url) pagebs = BeautifulSoup(pagebts, 'html5lib') next_page_url, nsrs = extract_subreddits_from_page(pagebs) srs += nsrs for sr in srs: if (nm := sr['subreddit']) in srlst: continue else: srlst.append(nm) iw = Path('r', sr['subreddit']).exists() nothing_new = nothing_new and iw if colored is not None: ds = '@' if iw else '#' srn = sr['subreddit'] isw = sr['isNsfw'] sfw = 'nsfw' if isw else 'sfw' sfw = f'[{sfw}]' srt = sr['title'] srd = pad_text_block(sr['description'], ' '*8) srl = sr['link'].replace('//old.', '//www.') if colored: ds = clrlib.stylize( ds, [clrlib.fg('light_green' if iw else 'light_red')] ) srn = clrlib.stylize( srn, [clrlib.fg('light_cyan')] ) sfw = clrlib.stylize( sfw, [clrlib.fg('light_green' if not isw else 'light_red')] ) srl = clrlib.stylize( srl, [clrlib.fg('light_blue')] ) srt = clrlib.stylize( srt, [clrlib.fg('cyan')] ) srd = '\n'.join(list(map( lambda srdl: clrlib.stylize( srdl, [clrlib.fg('dark_gray' if iw else 'light_gray')] ), srd.splitlines() ))) termssplit = term.split() def highligher(t): clrlibobj = clrlib.colored('') bgreset = clrlibobj.ESC+'49'+clrlibobj.END return clrlib.bg('red') + t + bgreset srn = highlight_search_term(termssplit, srn, highligher) srt = highlight_search_term(termssplit, srt, highligher) srd = highlight_search_term(termssplit, srd, highligher) print(f"{ds} {srn} {sfw} {srl}") print(f" {srt}") print(srd) print() if nothing_new: if colored is not None: msg = "> Nothing new... move on!" if colored: msg = clrlib.stylize(msg, [clrlib.fg('yellow')]) print(msg) simpleDownloader.cleanCookies() return srs def main(): search_term = ( ' '.join(list(map(str.strip, map(str, sys.argv[1:])))) ).strip() if len(search_term) <= 0: print(f'Usage:\n {sys.argv[0]} ') else: do_search(search_term) if __name__ == '__main__': main()