204 lines
6.6 KiB
Python
204 lines
6.6 KiB
Python
|
#!/usr/bin/env python3
|
||
|
# -*- encoding: utf-8 -*-
|
||
|
|
||
|
import sys
|
||
|
import urllib.parse
|
||
|
from pathlib import Path
|
||
|
from typing import AnyStr, Callable, Dict, List, Optional, Tuple
|
||
|
|
||
|
import colored as clrlib
|
||
|
import html2text as html2textlib
|
||
|
from bs4 import BeautifulSoup
|
||
|
|
||
|
from .system import simpleDownloader
|
||
|
|
||
|
|
||
|
def html2text(html, withMd=True, limit=65535):
|
||
|
h = html2textlib.HTML2Text(baseurl="", bodywidth=limit)
|
||
|
if not withMd:
|
||
|
h.ignore_emphasis = True
|
||
|
h.ignore_images = True
|
||
|
h.ignore_links = True
|
||
|
h.ignore_tables = True
|
||
|
return h.handle(html)
|
||
|
|
||
|
|
||
|
def extract_subreddits_from_page(pagebs: BeautifulSoup) -> Tuple[AnyStr, List[Dict[str, str]]]:
|
||
|
nextbutton = pagebs.find(class_='nav-buttons')
|
||
|
if nextbutton:
|
||
|
nextbutton = nextbutton.find(class_='next-button')
|
||
|
if nextbutton:
|
||
|
nextbutton = nextbutton.find('a')
|
||
|
if nextbutton:
|
||
|
nextbutton = nextbutton['href']
|
||
|
srs = list()
|
||
|
srtbs = pagebs.find(id='siteTable')
|
||
|
# print(srtbs)
|
||
|
for srbs in srtbs.find_all(class_='subreddit'):
|
||
|
isNsfw = srbs.find('span', alt='NSFW') is not None
|
||
|
titlebs = srbs.find('a', class_='title')
|
||
|
descriptionbs = srbs.find(class_='description')
|
||
|
# descriptionPrettyHtml = descriptionbs.prettify()
|
||
|
link = titlebs['href']
|
||
|
if '/r/' not in link:
|
||
|
continue
|
||
|
name = titlebs.text
|
||
|
subreddit = name.split(':', 1)[0].split('/', 1)[-1].lower()
|
||
|
title = name.split(':', 1)[1][1:]
|
||
|
description = html2text(str(descriptionbs), False, 60).strip()
|
||
|
srs.append(dict(
|
||
|
isNsfw=isNsfw,
|
||
|
link=link,
|
||
|
subreddit=subreddit,
|
||
|
title=title,
|
||
|
description=description,
|
||
|
))
|
||
|
# print(isNsfw)
|
||
|
# print(subreddit)
|
||
|
# print(title)
|
||
|
# print(link)
|
||
|
# print(description)
|
||
|
# print()
|
||
|
# print('-'*79)
|
||
|
# print()
|
||
|
# raise Exception()
|
||
|
return (nextbutton, srs)
|
||
|
|
||
|
|
||
|
def pad_text_block(s: str, wth: str) -> str:
|
||
|
return '\n'.join(list(map(
|
||
|
lambda l: f'{wth}{l}',
|
||
|
s.splitlines()
|
||
|
)))
|
||
|
|
||
|
|
||
|
def findmany(text: str, terms: List[str]) -> Tuple[int, str]:
|
||
|
if len(terms) <= 0:
|
||
|
return -1, None
|
||
|
else:
|
||
|
incidences = dict()
|
||
|
for term in terms:
|
||
|
pos = text.find(term)
|
||
|
if pos >= 0:
|
||
|
incidences[pos] = term
|
||
|
if len(incidences) <= 0:
|
||
|
return -1, None
|
||
|
else:
|
||
|
m = min(incidences.keys())
|
||
|
return m, incidences[m]
|
||
|
|
||
|
|
||
|
def highlight_search_term(terms: List[str], text: str, styler: Callable[[str], str], case_insensitive: bool = True) -> str:
|
||
|
termso = terms
|
||
|
texto = text
|
||
|
textl = text.lower() if case_insensitive else text
|
||
|
termsl = list(map(str.lower, terms)) if case_insensitive else terms
|
||
|
buffo = ''
|
||
|
while True:
|
||
|
matchpos, matchtrm = findmany(textl, termsl)
|
||
|
if matchpos < 0:
|
||
|
buffo += texto
|
||
|
break
|
||
|
else:
|
||
|
buffo += texto[:matchpos]
|
||
|
buffo += styler(texto[matchpos:matchpos+len(matchtrm)])
|
||
|
texto = texto[matchpos+len(matchtrm):]
|
||
|
textl = textl[matchpos+len(matchtrm):]
|
||
|
return buffo
|
||
|
|
||
|
|
||
|
def do_search(term: str, include_nsfw: bool = True, colored: Optional[bool] = True) -> List[Dict[str, str]]:
|
||
|
simpleDownloader.cleanCookies()
|
||
|
simpleDownloader.setCookies({'over18': 1})
|
||
|
next_page_url = (
|
||
|
'https://old.reddit.com/subreddits/search?' +
|
||
|
('include_over_18=on&' if include_nsfw else '') +
|
||
|
'q=' + urllib.parse.quote_plus(term)
|
||
|
)
|
||
|
srs = list()
|
||
|
srlst = list()
|
||
|
nothing_new = True
|
||
|
while next_page_url:
|
||
|
pagebts = simpleDownloader.getUrlBytes(next_page_url)
|
||
|
pagebs = BeautifulSoup(pagebts, 'html5lib')
|
||
|
next_page_url, nsrs = extract_subreddits_from_page(pagebs)
|
||
|
srs += nsrs
|
||
|
for sr in srs:
|
||
|
if (nm := sr['subreddit']) in srlst:
|
||
|
continue
|
||
|
else:
|
||
|
srlst.append(nm)
|
||
|
iw = Path('r', sr['subreddit']).exists()
|
||
|
nothing_new = nothing_new and iw
|
||
|
if colored is not None:
|
||
|
ds = '@' if iw else '#'
|
||
|
srn = sr['subreddit']
|
||
|
isw = sr['isNsfw']
|
||
|
sfw = 'nsfw' if isw else 'sfw'
|
||
|
sfw = f'[{sfw}]'
|
||
|
srt = sr['title']
|
||
|
srd = pad_text_block(sr['description'], ' '*8)
|
||
|
srl = sr['link'].replace('//old.', '//www.')
|
||
|
if colored:
|
||
|
ds = clrlib.stylize(
|
||
|
ds,
|
||
|
[clrlib.fg('light_green' if iw else 'light_red')]
|
||
|
)
|
||
|
srn = clrlib.stylize(
|
||
|
srn,
|
||
|
[clrlib.fg('light_cyan')]
|
||
|
)
|
||
|
sfw = clrlib.stylize(
|
||
|
sfw,
|
||
|
[clrlib.fg('light_green' if not isw else 'light_red')]
|
||
|
)
|
||
|
srl = clrlib.stylize(
|
||
|
srl,
|
||
|
[clrlib.fg('light_blue')]
|
||
|
)
|
||
|
srt = clrlib.stylize(
|
||
|
srt,
|
||
|
[clrlib.fg('cyan')]
|
||
|
)
|
||
|
srd = '\n'.join(list(map(
|
||
|
lambda srdl: clrlib.stylize(
|
||
|
srdl,
|
||
|
[clrlib.fg('dark_gray' if iw else 'light_gray')]
|
||
|
),
|
||
|
srd.splitlines()
|
||
|
)))
|
||
|
termssplit = term.split()
|
||
|
def highligher(t):
|
||
|
clrlibobj = clrlib.colored('')
|
||
|
bgreset = clrlibobj.ESC+'49'+clrlibobj.END
|
||
|
return clrlib.bg('red') + t + bgreset
|
||
|
srn = highlight_search_term(termssplit, srn, highligher)
|
||
|
srt = highlight_search_term(termssplit, srt, highligher)
|
||
|
srd = highlight_search_term(termssplit, srd, highligher)
|
||
|
print(f"{ds} {srn} {sfw} {srl}")
|
||
|
print(f" {srt}")
|
||
|
print(srd)
|
||
|
print()
|
||
|
if nothing_new:
|
||
|
if colored is not None:
|
||
|
msg = "> Nothing new... move on!"
|
||
|
if colored:
|
||
|
msg = clrlib.stylize(msg, [clrlib.fg('yellow')])
|
||
|
print(msg)
|
||
|
simpleDownloader.cleanCookies()
|
||
|
return srs
|
||
|
|
||
|
|
||
|
def main():
|
||
|
search_term = (
|
||
|
' '.join(list(map(str.strip, map(str, sys.argv[1:]))))
|
||
|
).strip()
|
||
|
if len(search_term) <= 0:
|
||
|
print(f'Usage:\n {sys.argv[0]} <search_term>')
|
||
|
else:
|
||
|
do_search(search_term)
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|