Browse Source

update

keep-around/926d9e08ad2d0a3eb7e68386c5f31019b8e6aa6b
Adler Neves 3 years ago
parent
commit
ac0ab46929
15 changed files with 330 additions and 37 deletions
  1. +2
    -0
      .gitignore
  2. +1
    -1
      d/About.md
  3. +24
    -28
      reddit_imgs/reorganize.py
  4. +63
    -3
      reddit_imgs/runner.py
  5. +2
    -1
      reddit_imgs/sync.py
  6. +16
    -1
      reddit_imgs/system/downloader/downloadedData.py
  7. +18
    -1
      reddit_imgs/system/downloader/modules/direct_link.py
  8. +22
    -0
      reddit_imgs/system/downloader/modules/imgur_com.py
  9. +18
    -2
      reddit_imgs/system/simpleDownloader.py
  10. +22
    -0
      reddit_imgs/system/subredditTools.py
  11. +7
    -0
      reddit_imgs/system/textTools.py
  12. +130
    -0
      reddit_imgs/wallpapers.py
  13. +1
    -0
      requirements.txt
  14. +3
    -0
      w/.gitignore
  15. +1
    -0
      w/About.md

+ 2
- 0
.gitignore View File

@ -1,3 +1,5 @@
tmp
tmp/**
**/*.pyc
**/__pycache__
**/__pycache__/**

+ 1
- 1
d/About.md View File

@ -1 +1 @@
This folder contains all images grouped by discussions.
This folder contains all images grouped by subreddit.

+ 24
- 28
reddit_imgs/reorganize.py View File

@ -2,16 +2,13 @@
# -*- encoding: utf-8 -*-
import os
import re
import json
import shutil
import datetime
from .system import subredditTools
def readAllFile(s):
with open(s) as f:
return f.read()
def slugify(dat):
return re.sub(r'[^\w\s\.\-\(\)\[\]]', '-', dat)
wdir = os.path.abspath('.')
@ -39,6 +36,10 @@ def main():
)
))
linksDown = 0
linksNotDown = 0
linksErr = 0
copyfiles = list()
print('\r'+' '*79+'\r'+'Calculating changes...',end='')
@ -48,29 +49,19 @@ def main():
for link in links:
imgd = os.path.join(idir, link['datakey'])
meta = os.path.join(imgd, 'meta.json')
if os.path.exists(meta):
if not os.path.exists(meta):
linksNotDown+=1
else:
files = json.loads(readAllFile(meta))
for seq, file in enumerate(files):
imgfrom = os.path.join(imgd, file['dname'])
imgfn = ''
imgfn+= subreddit
imgfn+= '__'
imgfn+= datetime.datetime.fromtimestamp(int(link['timestamp'])).isoformat().replace('T','_').replace(':','-')
imgfn+= '_'
imgfn+= 'nsfw' if link['nsfw'] else 'safe'
imgfn+= '___'
imgfn+= '-' if link['flair'] is None else slugify(link['flair'])
imgfn+= '___'
imgfn+= '-' if link['sharer'] is None else slugify(link['sharer'])
imgfn+= '___'
imgfn+= slugify(link['title'][:50])
imgfn+= '___'
imgfn+= slugify(link['datakey'])
imgfn+= '___'
imgfn+= str('%04d'%seq)
imgfn+= '.'+file['ext']
imgto = os.path.join(sdir,imgfn)
copyfiles.append((imgfrom,imgto))
if len(files)>0:
linksDown+=1
for seq, file in enumerate(files):
imgfrom = os.path.join(imgd, file['dname'])
imgfn = subredditTools.assembleFileName(subreddit, link, seq, file['ext'])
imgto = os.path.join(sdir,imgfn)
copyfiles.append((imgfrom,imgto))
else:
linksErr+=1
del links
lcf = len(copyfiles)
@ -89,9 +80,14 @@ def main():
print('\r'+' '*79+'\r'+'Aborted safely',end='')
print()
raise e
print('\r'+' '*79+'\r'+'Done.',end='')
print('\r'+' '*79+'\r'+'100% copied')
print('%05d files sucessfully downloaded'%lcf)
print('%05d links downloaded'%linksDown)
print('%05d links errored'%linksErr)
print('%05d links ignored'%linksNotDown)
print('%05d links total'%(linksDown+linksNotDown+linksErr))
print()
print('Done.')
if __name__ == '__main__':
main()

+ 63
- 3
reddit_imgs/runner.py View File

@ -4,12 +4,16 @@
import reddit_imgs.sync
import reddit_imgs.fetch
import reddit_imgs.reorganize
import reddit_imgs.wallpapers
import os
import sys
import shutil
wdir = os.path.abspath('.')
def ensureFolderAvailability():
if not os.path.exists(os.path.join(wdir,'w')):
os.makedirs(os.path.join(wdir,'w'))
if not os.path.exists(os.path.join(wdir,'d')):
os.makedirs(os.path.join(wdir,'d'))
if not os.path.exists(os.path.join(wdir,'i')):
@ -27,6 +31,8 @@ def managesubreddits():
print('1) List monitored subreddits')
print('2) Add monitored subreddit')
print('3) Remove monitored subreddit')
print('4) Set as wallpaper source')
print('5) Unset as wallpaper source')
print()
print('0) Back')
print('----------------------------------------------')
@ -39,10 +45,14 @@ def managesubreddits():
subreddits_dir = os.path.join(wdir,'r')
subreddits_isfolder = lambda sr: os.path.isdir(os.path.join(subreddits_dir,sr))
subreddits = sorted(filter(subreddits_isfolder, os.listdir(subreddits_dir)))
if i=='1' or i=='3':
if i in ['1', '3', '4', '5']:
print('Subreddits monitored:')
for sr in subreddits:
print('/r/%s'%sr)
print('/r/{0}'.format(sr),end='')
if os.path.isfile(os.path.join(subreddits_dir,sr,'wallpaper.flag')):
print('\t\t(wallpaper)')
else:
print()
print()
if i=='1':
print('Press enter to continue')
@ -65,6 +75,37 @@ def managesubreddits():
print('Done.')
print('Press enter to continue')
input()
elif i=='4':
print('Enter the subreddit you want to set as wallpaper source:')
add = input('/r/')
try:
dd = os.path.join(subreddits_dir,add)
if not os.path.exists(dd):
os.makedirs(dd)
f = open(os.path.join(dd, 'wallpaper.flag'),'w')
f.write('')
f.close()
except: pass
print()
print('Done.')
print('Press enter to continue')
input()
elif i=='5':
print('Enter the subreddit you want to unset as wallpaper source:')
add = input('/r/')
try:
dd = os.path.join(subreddits_dir,add)
if not os.path.exists(dd):
os.makedirs(dd)
f = open(os.path.join(dd, 'wallpaper.flag'),'w')
f.write('')
f.close()
os.remove(os.path.join(dd, 'wallpaper.flag'))
except: pass
print()
print('Done.')
print('Press enter to continue')
input()
def mainmenu():
i = ''
@ -77,6 +118,7 @@ def mainmenu():
print('2) Get link list to be downloaded from reddit')
print('3) Download grabbed links')
print('4) Group and put nice names on downloaded data')
print('5) Sepparate wallpapers')
print()
print('0) Quit')
print('----------------------------------------------')
@ -92,10 +134,28 @@ def mainmenu():
reddit_imgs.fetch.main()
elif i=='4':
reddit_imgs.reorganize.main()
elif i=='5':
reddit_imgs.wallpapers.main()
def main():
ensureFolderAvailability()
mainmenu()
if len(sys.argv)>1:
cmdline()
else:
mainmenu()
def cmdline():
cmd = sys.argv[1]
if cmd == 'sync':
reddit_imgs.sync.main()
elif cmd == 'fetch':
reddit_imgs.fetch.main()
elif cmd == 'reorganize':
reddit_imgs.reorganize.main()
elif cmd == 'wallpapers':
reddit_imgs.wallpapers.main()
else:
print('Usage {0} [sync/fetch/reorganize/wallpapers]'.format(sys.argv[0]))
if __name__ == '__main__':
main()


+ 2
- 1
reddit_imgs/sync.py View File

@ -26,6 +26,7 @@ def main():
except: pass
#srdt = getEmptySubredditData(subreddit)
pageno = 0
ygst = srdt['date_first']
while nextpage:
pageno+=1
print(('/r/{0:<20} loading page #%05d'%pageno).format(subreddit))
@ -33,7 +34,7 @@ def main():
redditBytes = simpleDownloader.getUrlBytes(nextpage)
bs = BeautifulSoup(redditBytes)
first, last, nextpage, links = getSubredditPageInfo(bs)
if srdt['date_last'] <= last:
if ygst >= first: #if latest stored post is at same age or older than the latest downloaded post, then we are up-to-date
nextpage = None
srdt['date_first'] = max(first, srdt['date_first'])
srdt['date_last'] = min(last, srdt['date_last'])


+ 16
- 1
reddit_imgs/system/downloader/downloadedData.py View File

@ -7,9 +7,24 @@ import json
import os
class DownloadedData(object):
def __init__(self):
def __init__(self, loadfrom = None):
self.initialize()
self.loadfrom(loadfrom)
def initialize(self):
self.control = list()
self.fb = dict()
def loadfrom(self,loadfrom):
if loadfrom:
with open(os.path.join(loadfrom,'meta.json')) as f:
self.control = json.loads(f.read())
for ctrl in self.control:
fnm = ctrl['dname']
cnt = b''
with open(os.path.join(loadfrom,fnm),'rb') as f:
cnt = f.read()
self.fb[fnm] = cnt
def storedLinks(self):
return [ctrl['link'] for ctrl in self.control]
def put(self, link, downloaded, ext=None):
if ext is None:
try:


+ 18
- 1
reddit_imgs/system/downloader/modules/direct_link.py View File

@ -17,14 +17,31 @@ class DirectLink(object):
link.startswith('http://dl.dropboxusercontent.com')
or
link.startswith('https://dl.dropboxusercontent.com')
or
link.startswith('http://pawsru.org')
or
link.startswith('https://pawsru.org')
):
return False
return True
def needsPromiscuity(self, link):
if (
link.startswith('http://cdn.discordapp.com')
or
link.startswith('https://cdn.discordapp.com')
or
link.startswith('http://www.weasyl.com')
or
link.startswith('https://www.weasyl.com')
):
return True
return False
def download(self, link):
dd = DownloadedData()
simpleDownloader.cleanCookies()
bts = simpleDownloader.getUrlBytes(link)
bts = simpleDownloader.getUrlBytes(link, self.needsPromiscuity(link))
simpleDownloader.cleanCookies()
if bts is not None:
dd.put(link,bts)


+ 22
- 0
reddit_imgs/system/downloader/modules/imgur_com.py View File

@ -1,8 +1,10 @@
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
import os
import re
import json
import shutil
import filetype
from ..downloadedData import DownloadedData
from ... import simpleDownloader
@ -62,6 +64,20 @@ class ImgurCom(object):
imgs = [albnfo]
if 'album_images' in albnfo:
imgs = albnfo['album_images']['images']
intermediarySaves = len(imgs)>=10
if intermediarySaves:
if not os.path.isdir('tmp'):
os.makedirs('tmp')
if os.path.isfile('tmp/link.url'):
with open('tmp/link.url') as f:
svdlnk = f.read()
if svdlnk == link:
dd.loadfrom('tmp')
else:
shutil.rmtree('tmp')
os.makedirs('tmp')
with open('tmp/link.url', 'w') as f:
f.write(link)
for seq, img in enumerate(imgs):
print(' '*50,end='')
print('\r',end='')
@ -70,6 +86,8 @@ class ImgurCom(object):
if img['ext'] == '.gifv':
img['ext'] = '.mp4'
durl = 'http://i.imgur.com/'+img['hash']+img['ext']
if durl in dd.storedLinks():
continue
imb = simpleDownloader.getUrlBytes(durl)
if imb is None:
print()
@ -78,7 +96,11 @@ class ImgurCom(object):
simpleDownloader.cleanCookies()
return None
dd.put(durl, imb, img['ext'][1:])
if intermediarySaves and seq%10 == 0:
dd.into('tmp')
print('\r',end='')
if os.path.isdir('tmp'):
shutil.rmtree('tmp')
else:
dd.put(link, bts, ext)
simpleDownloader.cleanCookies()


+ 18
- 2
reddit_imgs/system/simpleDownloader.py View File

@ -33,7 +33,7 @@ def setCookies(newCookies):
cleanCookies()
patchCookies(newCookies)
def getUrlBytes(url):
def getUrlBytes(url, giveUpOn403=False):
global cookie
request = urllib.request.Request(url)
request.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64) '+
@ -45,7 +45,7 @@ def getUrlBytes(url):
request.add_header("Cookie", '; '.join(map(lambda a: '='.join(a), cookie.items())))
response = None
try:
response = urllib.request.urlopen(request, timeout=15)
response = urllib.request.urlopen(request, timeout=30)
except urllib.error.HTTPError as e:
if e.code == 429:
print('[URL] Got 429 (Too Many Requests): sleeping for 5 seconds')
@ -57,12 +57,28 @@ def getUrlBytes(url):
print(' @ %s'%url)
time.sleep(5)
return getUrlBytes(url)
if e.code == 403 and giveUpOn403:
print('[URL] Got 403 (Forbidden): assuming "Not Found"')
print(' @ %s'%url)
return None
elif e.code == 500:
print('[URL] Got 500 (Server Error): assuming "Not Found"')
return None
elif e.code == 404:
return None
elif e.code == 400:
return None
raise e
except urllib.error.URLError as e:
if str(e.reason).startswith('EOF occurred in violation of protocol ('):
print('Server doesn\'t know how to use HTTP properly - assuming "Not Found"')
return None
if str(e.reason).startswith('[SSL: CERTIFICATE'):
print('Their SSL certificate is screwed up - assuming "Not Found"')
return None
if str(e.reason).startswith('[Errno -5]'):
print('Their DNS server is screwed up - assuming "Not Found"')
return None
if str(e.reason).startswith('[Errno -2]'):
return None
if str(e.reason).startswith('[Errno -3]'):


+ 22
- 0
reddit_imgs/system/subredditTools.py View File

@ -1,8 +1,10 @@
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
import datetime
import dateutil.parser
from .limits import minint, maxint
from .textTools import slugify
def getInfoFromRedditItem(bs):
nsfw = 'over18' in bs['class']
@ -52,3 +54,23 @@ def getSubredditPageInfo(bs):
except: pass
structured_links = list(map(getInfoFromRedditItem, links))
return first, last, nextpage, structured_links
def assembleFileName(subreddit,link,seq,ext):
imgfn = ''
imgfn+= subreddit
imgfn+= '__'
imgfn+= datetime.datetime.fromtimestamp(int(link['timestamp'])).isoformat().replace('T','_').replace(':','-')
imgfn+= '_'
imgfn+= 'nsfw' if link['nsfw'] else 'safe'
imgfn+= '___'
imgfn+= '-' if link['flair'] is None else slugify(link['flair'])
imgfn+= '___'
imgfn+= '-' if link['sharer'] is None else slugify(link['sharer'])
imgfn+= '___'
imgfn+= slugify(link['title'][:50])
imgfn+= '___'
imgfn+= slugify(link['datakey'])
imgfn+= '___'
imgfn+= str('%04d'%seq)
imgfn+= '.'+ext
return imgfn

+ 7
- 0
reddit_imgs/system/textTools.py View File

@ -0,0 +1,7 @@
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
import re
def slugify(dat):
return re.sub(r'[^\w\s\.\-\(\)\[\]]', '-', dat)

+ 130
- 0
reddit_imgs/wallpapers.py View File

@ -0,0 +1,130 @@
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
import os
import json
import shutil
import filetype
import PIL.Image
from .system import subredditTools
def readAllFile(d):
with open(d) as f:
return f.read()
proportion = (
5/4,
21/9
)
minPixels = 1000**2
wdir = os.path.abspath('.')
idir = os.path.join(wdir, 'i')
pdir = os.path.join(wdir, 'w')
rdir = os.path.join(wdir, 'r')
def main():
subreddits = (
map(
lambda sr: json.loads(readAllFile(os.path.join(rdir,sr,'subreddit.json'))),
sorted(
filter(
lambda sr:
os.path.isdir(os.path.join(rdir,sr))
and
os.path.isfile(os.path.join(rdir,sr,'subreddit.json'))
and
os.path.isfile(os.path.join(rdir,sr,'wallpaper.flag')),
os.listdir(rdir)
)
)
))
pass
copyfiles = list()
linksDown = 0
linksNotDown = 0
linksErr = 0
print('Listing files...')
for subreddit in subreddits:
sdir = pdir
for link in subreddit['links']:
imgd = os.path.join(idir, link['datakey'])
meta = os.path.join(imgd, 'meta.json')
if not os.path.exists(meta):
linksNotDown+=1
else:
files = json.loads(readAllFile(meta))
if len(files)<=0:
linksErr+=1
else:
linksDown+=1
for seq, file in enumerate(files):
imgfrom = os.path.join(imgd, file['dname'])
ext = filetype.guess_extension(imgfrom)
imgfn = subredditTools.assembleFileName(
subreddit['subreddit'],
link,
seq,
ext
)
nsfwsafe = 'nsfw' if link['nsfw'] else 'safe'
imgto = os.path.join(sdir,nsfwsafe,imgfn)
copyfiles.append((imgfrom,imgto))
print('Creating folders...')
lcf = len(copyfiles)
for (cnt, (src, dst)) in enumerate(copyfiles):
container = os.path.dirname(os.path.abspath(dst))
if not os.path.exists(container):
os.makedirs(container)
print('Ensuring minimum resolution and proportion...')
ignored=0
kept=0
lcf = len(copyfiles)
print('\r'+' '*79+'\r'+'%03d%% processed: %05d of %05d'%(0, 0, lcf),end='')
for (cnt, (src, dst)) in reversed(list(enumerate(copyfiles))):
if os.path.exists(dst): continue
print('\r'+' '*79+'\r'+'%03d%% processed: %05d of %05d'%((((lcf-cnt)/lcf)*100)//1, lcf-cnt, lcf),end='')
with PIL.Image.open(src) as img:
width, height = img.size
prop = width/height
pxls = width*height
if not (pxls >= minPixels and prop >= proportion[0] and prop <= proportion[1]):
ignored+=1
del copyfiles[cnt]
else:
kept+=1
print()
print('Copying files...')
lcf = len(copyfiles)
print('\r'+' '*79+'\r'+'%03d%% copied: %05d of %05d'%(0, 0, lcf),end='')
for (cnt, (src, dst)) in enumerate(copyfiles):
if os.path.exists(dst): continue
print('\r'+' '*79+'\r'+'%03d%% copied: %05d of %05d'%((((cnt+1)/lcf)*100)//1, cnt+1, lcf),end='')
try:
shutil.copyfile(src, dst)
except KeyboardInterrupt as e:
print()
print('\r'+' '*79+'\r'+'Deleting interrupted file...',end='')
os.remove(dst)
print('\r'+' '*79+'\r'+'Aborted safely',end='')
print()
raise e
print()
print()
print('{0:>5} files were kept'.format(kept))
print('{0:>5} files were ignored'.format(ignored))
if __name__ == '__main__':
main()

+ 1
- 0
requirements.txt View File

@ -1,2 +1,3 @@
filetype==1.0.0
beautifulsoup4==4.6.0
Pillow==5.0.0

+ 3
- 0
w/.gitignore View File

@ -0,0 +1,3 @@
**
!About.md
!.gitignore

+ 1
- 0
w/About.md View File

@ -0,0 +1 @@
This folder contains all wallpapers grouped by safety.

Loading…
Cancel
Save