commit b8629c749021202ade6e7b3d21d22ca8b699a7ab
Author: Ádler Neves <adlerosn@gmail.com>
Date:   Fri Dec 29 20:54:22 2017 -0200

    initial commit

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1615e04
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+**/*.pyc
+**/__pycache__
+**/__pycache__/**
diff --git a/d/.gitignore b/d/.gitignore
new file mode 100644
index 0000000..b66ba8a
--- /dev/null
+++ b/d/.gitignore
@@ -0,0 +1,3 @@
+**
+!About.md
+!.gitignore
diff --git a/d/About.md b/d/About.md
new file mode 100644
index 0000000..05844bd
--- /dev/null
+++ b/d/About.md
@@ -0,0 +1 @@
+This folder contains all images grouped by discussions.
diff --git a/i/.gitignore b/i/.gitignore
new file mode 100644
index 0000000..b66ba8a
--- /dev/null
+++ b/i/.gitignore
@@ -0,0 +1,3 @@
+**
+!About.md
+!.gitignore
diff --git a/i/About.md b/i/About.md
new file mode 100644
index 0000000..b8f68f2
--- /dev/null
+++ b/i/About.md
@@ -0,0 +1 @@
+This folder holds the downloaded images grouped by its discussion id from reddit. Not user friendly.
diff --git a/r/.gitignore b/r/.gitignore
new file mode 100644
index 0000000..b66ba8a
--- /dev/null
+++ b/r/.gitignore
@@ -0,0 +1,3 @@
+**
+!About.md
+!.gitignore
diff --git a/r/About.md b/r/About.md
new file mode 100644
index 0000000..6d9fdab
--- /dev/null
+++ b/r/About.md
@@ -0,0 +1,3 @@
+This folder contains the folders named as the subreddit name you want downloaded.
+
+As example, for downloading "/r/photoshopbattles/" you create a folder named "photoshopbattles".
diff --git a/reddit_imgs/fetch.py b/reddit_imgs/fetch.py
new file mode 100755
index 0000000..1fa795c
--- /dev/null
+++ b/reddit_imgs/fetch.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+
+from .system import downloader as downloaderModule
+import json
+import os
+
+downloaderGetter = downloaderModule.getDownloader
+
+wdir = os.path.abspath('.')
+
+isImageDirectLink = lambda s: s.endswith('.jpg') or s.endswith('.png') or s.endswith('.gif') or s.endswith('.webp')
+
+def main():
+    links = list()
+    subreddits = sorted(filter(lambda sr: os.path.isdir(os.path.join(wdir,'r',sr)), os.listdir(os.path.join(wdir,'r'))))
+    for subreddit in subreddits:
+        srf = os.path.abspath(os.path.join(wdir,'r',subreddit,'subreddit.json'))
+        links2 = list()
+        try:
+            with open(srf) as f:
+                links2 = json.loads(f.read())['links']
+        except: pass
+        links+=links2
+        del links2
+        del srf
+        del subreddit
+    del subreddits
+    
+    links.sort(key=lambda link: link['timestamp'])
+    
+    medias = dict((('direct_link',list()),))
+    for link in links:
+        if isImageDirectLink(link['link']):
+            medias['direct_link'].append(link)
+            continue
+        if link['domain'] not in medias:
+            medias[link['domain']] = list()
+        medias[link['domain']].append(link)
+        del link
+    del links
+    
+    priorities = list()
+    for source, links in sorted(medias.items()):
+        downloaderClass = downloaderGetter(source)
+        if downloaderClass is None:
+            print('No downloader for: {0:<35} | {1:>5} links dropped'.format(source,len(links)))
+            priorities.append((len(links),source))
+            del medias[source]
+            continue
+    
+    top_priorities = list(reversed(sorted(priorities)))[:10]
+    prioremain = sum(map(lambda a: a[0], list(reversed(sorted(priorities)))[10:]))
+    priolen = len(priorities)
+    del priorities
+    
+    for source, links in sorted(medias.items()):
+        print('Changing downloader for next %d links on %s'%(len(links),source))
+        #if source!='imgur.com': continue
+        downloaderClass = downloaderGetter(source)
+        downloader = downloaderClass()
+        for seq, link in enumerate(links):
+            print('Downloading link #%05d of %05d: %s  << %s'%(seq+1, len(links), link['link'], link['datakey']))
+            if not downloader.recognizes(link['link']):
+                continue
+            target = os.path.join(wdir,'i',link['datakey'])
+            if not os.path.exists(target):
+                downloader.download(link['link']).into(target)
+    
+    print()
+    print('='*47)
+    print('| {0:^43} |'.format('Missing downloaders'))
+    print('='*47)
+    print('| {0:^30} | {1:^10} |'.format('Domain','Hits'))
+    print('-'*47)
+    for priority in top_priorities:
+        print('| {0:^30} | {1:^10} |'.format(*list(reversed(priority))))
+        del priority
+    del top_priorities
+    print('|'+'.'*32+'|'+'.'*12+'|')
+    print('| {0:^30} | {1:^10} |'.format('...and more %d domains'%(priolen-10), prioremain))
+    del priolen
+    print('='*47)
+    print()
+
+if __name__ == '__main__':
+    main()
diff --git a/reddit_imgs/reorganize.py b/reddit_imgs/reorganize.py
new file mode 100755
index 0000000..2deb27d
--- /dev/null
+++ b/reddit_imgs/reorganize.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+
+import os
+import re
+import json
+import shutil
+import datetime
+
+def readAllFile(s):
+    with open(s) as f:
+        return f.read()
+def slugify(dat):
+    return re.sub(r'[^\w\s\.\-\(\)\[\]]', '-', dat)
+
+wdir = os.path.abspath('.')
+
+def main():
+    idir = os.path.join(wdir, 'i')
+    ddir = os.path.join(wdir, 'd')
+    
+    subreddits = sorted(filter(lambda sr: os.path.isdir(os.path.join(wdir,'r',sr)), os.listdir(os.path.join(wdir,'r'))))
+    
+    subreddits = list(zip(
+        subreddits,
+        map(
+            lambda a: a['links'],
+            map(
+                json.loads,
+                map(
+                    readAllFile,
+                    map(
+                        lambda sr: os.path.join(wdir,'r',sr,'subreddit.json'),
+                        subreddits
+        )))),
+        map(
+            lambda sr: os.path.join(wdir,'d',sr),
+            subreddits
+        )
+    ))
+    
+    copyfiles = list()
+    
+    print('\r'+' '*79+'\r'+'Calculating changes...',end='')
+    
+    for subreddit, links, target in subreddits:
+        sdir = os.path.join(ddir,subreddit)
+        for link in links:
+            imgd = os.path.join(idir, link['datakey'])
+            meta = os.path.join(imgd, 'meta.json')
+            if os.path.exists(meta):
+                files = json.loads(readAllFile(meta))
+                for seq, file in enumerate(files):
+                    imgfrom = os.path.join(imgd, file['dname'])
+                    imgfn = ''
+                    imgfn+= subreddit
+                    imgfn+= '__'
+                    imgfn+= datetime.datetime.fromtimestamp(int(link['timestamp'])).isoformat().replace('T','_').replace(':','-')
+                    imgfn+= '_'
+                    imgfn+= 'nsfw' if link['nsfw'] else 'safe'
+                    imgfn+= '___'
+                    imgfn+= '-' if link['flair'] is None else slugify(link['flair'])
+                    imgfn+= '___'
+                    imgfn+= '-' if link['sharer'] is None else slugify(link['sharer'])
+                    imgfn+= '___'
+                    imgfn+= slugify(link['title'][:50])
+                    imgfn+= '___'
+                    imgfn+= slugify(link['datakey'])
+                    imgfn+= '___'
+                    imgfn+= str('%04d'%seq)
+                    imgfn+= '.'+file['ext']
+                    imgto = os.path.join(sdir,imgfn)
+                    copyfiles.append((imgfrom,imgto))
+        del links
+    
+    lcf = len(copyfiles)
+    for (cnt, (src, dst)) in enumerate(copyfiles):
+        if os.path.exists(dst): continue
+        container = os.path.dirname(os.path.abspath(dst))
+        if not os.path.exists(container):
+            os.makedirs(container)
+        print('\r'+' '*79+'\r'+'%03d%% copied: %05d of %05d'%((((cnt+1)/lcf)*100)//1, cnt+1, lcf),end='')
+        try:
+            shutil.copyfile(src, dst)
+        except KeyboardInterrupt as e:
+            print()
+            print('\r'+' '*79+'\r'+'Deleting interrupted file...',end='')
+            os.remove(dst)
+            print('\r'+' '*79+'\r'+'Aborted safely',end='')
+            print()
+            raise e
+    
+    print('\r'+' '*79+'\r'+'Done.',end='')
+    print()
+
+if __name__ == '__main__':
+    main()
diff --git a/reddit_imgs/runner.py b/reddit_imgs/runner.py
new file mode 100755
index 0000000..ad52f3c
--- /dev/null
+++ b/reddit_imgs/runner.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+
+import reddit_imgs.sync
+import reddit_imgs.fetch
+import reddit_imgs.reorganize
+
+import os
+import shutil
+wdir = os.path.abspath('.')
+
+def ensureFolderAvailability():
+    if not os.path.exists(os.path.join(wdir,'a')):
+        os.makedirs(os.path.join(wdir,'a'))
+    if not os.path.exists(os.path.join(wdir,'d')):
+        os.makedirs(os.path.join(wdir,'d'))
+    if not os.path.exists(os.path.join(wdir,'i')):
+        os.makedirs(os.path.join(wdir,'i'))
+    if not os.path.exists(os.path.join(wdir,'r')):
+        os.makedirs(os.path.join(wdir,'r'))
+
+def managesubreddits():
+    i = ''
+    while i!='0':
+        print('\n'*100)
+        print('----------------------------------------------')
+        print('              Subreddit Manager               ')
+        print('----------------------------------------------')
+        print('1) List monitored subreddits')
+        print('2) Add monitored subreddit')
+        print('3) Remove monitored subreddit')
+        print()
+        print('0) Back')
+        print('----------------------------------------------')
+        print()
+        print('Enter your choice:')
+        i = input()
+        i = i.strip()
+        print()
+        print()
+        subreddits_dir = os.path.join(wdir,'r')
+        subreddits_isfolder = lambda sr: os.path.isdir(os.path.join(subreddits_dir,sr))
+        subreddits = sorted(filter(subreddits_isfolder, os.listdir(subreddits_dir)))
+        if i=='1' or i=='3':
+            print('Subreddits monitored:')
+            for sr in subreddits:
+                print('/r/%s'%sr)
+            print()
+            if i=='1':
+                print('Press enter to continue')
+                input()
+        if i=='3':
+            print('Enter the subreddit you want to get rid of:')
+            rem = input('/r/')
+            try: shutil.rmtree(os.path.join(subreddits_dir,rem))
+            except: pass
+            print()
+            print('Done.')
+            print('Press enter to continue')
+            input()
+        elif i=='2':
+            print('Enter the subreddit you want to add:')
+            add = input('/r/')
+            try: os.makedirs(os.path.join(subreddits_dir,add))
+            except: pass
+            print()
+            print('Done.')
+            print('Press enter to continue')
+            input()
+
+def mainmenu():
+    i = ''
+    while i!='0':
+        print('\n'*100)
+        print('----------------------------------------------')
+        print('            Reddit Image Downloader           ')
+        print('----------------------------------------------')
+        print('1) Manage subreddits')
+        print('2) Get link list to be downloaded from reddit')
+        print('3) Download grabbed links')
+        print('4) Group and put nice names on downloaded data')
+        print()
+        print('0) Quit')
+        print('----------------------------------------------')
+        print()
+        print('Enter your choice:')
+        i = input()
+        i = i.strip()
+        if i=='1':
+            managesubreddits()
+        elif i=='2':
+            reddit_imgs.sync.main()
+        elif i=='3':
+            reddit_imgs.fetch.main()
+        elif i=='4':
+            reddit_imgs.reorganize.main()
+
+def main():
+    ensureFolderAvailability()
+    mainmenu()
+
+if __name__ == '__main__':
+    main()
+
diff --git a/reddit_imgs/sync.py b/reddit_imgs/sync.py
new file mode 100755
index 0000000..e66d065
--- /dev/null
+++ b/reddit_imgs/sync.py
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+
+import os
+from bs4 import BeautifulSoup as _BS
+from .system import simpleDownloader
+from .system.subredditTools import getEmptySubredditData, getSubredditPageInfo
+import json
+
+def BeautifulSoup(data): return _BS(data, 'html5lib')
+
+simpleDownloader.setCookies({'over18':1})
+
+wdir = os.path.abspath('.')
+
+def main():
+    subreddits = sorted(filter(lambda sr: os.path.isdir(os.path.join(wdir,'r',sr)), os.listdir(os.path.join(wdir,'r'))))
+    for subreddit in subreddits:
+        srp = os.path.abspath(os.path.join(wdir,'r',subreddit))
+        #if subreddit!='yiff': continue
+        nextpage = 'https://www.reddit.com/r/'+subreddit+'/new/?count=0'
+        srdt = getEmptySubredditData(subreddit)
+        try:
+            with open(os.path.join(srp,'subreddit.json')) as f:
+                srdt = json.loads(f.read())
+        except: pass
+        #srdt = getEmptySubredditData(subreddit)
+        pageno = 0
+        while nextpage:
+            pageno+=1
+            print(('/r/{0:<20} loading page #%05d'%pageno).format(subreddit))
+            print(' >> %s'%nextpage)
+            redditBytes = simpleDownloader.getUrlBytes(nextpage)
+            bs = BeautifulSoup(redditBytes)
+            first, last, nextpage, links = getSubredditPageInfo(bs)
+            if srdt['date_last'] <= last:
+                nextpage = None
+            srdt['date_first'] = max(first, srdt['date_first'])
+            srdt['date_last'] = min(last, srdt['date_last'])
+            for link in links[::-1]:
+                if link not in srdt['links']:
+                    srdt['links'].append(link)
+        with open(os.path.join(srp,'subreddit.json'),'w') as f:
+            f.write(json.dumps(srdt ,sort_keys=True, indent=2))
+
+if __name__ == '__main__':
+    main()
diff --git a/reddit_imgs/system/downloader/__init__.py b/reddit_imgs/system/downloader/__init__.py
new file mode 100644
index 0000000..7e34022
--- /dev/null
+++ b/reddit_imgs/system/downloader/__init__.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+
+import os
+
+modules_map = dict()
+
+moduleNames = os.listdir(os.path.join(os.path.dirname(os.path.abspath(__file__)),'modules'))
+moduleNames = list(map(lambda a: a[:-3], filter(lambda a: a.endswith('.py'), moduleNames)))
+
+for moduleName in moduleNames:
+    exec('from .modules import {0} as {0}; modules_map["{0}"] = {0}'.format(moduleName))
+
+def getDownloader(domain):
+    for module in modules_map.values():
+        try:
+            if module.works_on(domain):
+                return module.get_class()
+        except: pass
+    return None
+
diff --git a/reddit_imgs/system/downloader/downloadedData.py b/reddit_imgs/system/downloader/downloadedData.py
new file mode 100644
index 0000000..5dc687c
--- /dev/null
+++ b/reddit_imgs/system/downloader/downloadedData.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+
+import filetype
+import shutil
+import json
+import os
+
+class DownloadedData(object):
+    def __init__(self):
+        self.control = list()
+        self.fb = dict()
+    def put(self, link, downloaded, ext=None):
+        if ext is None:
+            try:
+                ext = link.rsplit('/',1)[-1].rsplit('.',1)[-1]
+                if ext not in ['jpg','png','gif','webp']:
+                    raise Exception
+            except:
+                ext = filetype.guess_extension(downloaded)
+                if ext is None:
+                    ext = 'unk'
+        fnm = '%04d.%s'%(len(self.control),ext)
+        self.control.append({
+            'dname': fnm,
+            'link':link,
+            'ext':ext,
+        })
+        self.fb[fnm] = downloaded
+    def into(self, directory):
+        directory = os.path.abspath(directory)
+        if not os.path.exists(directory):
+            os.makedirs(directory)
+        try:
+            with open(os.path.join(directory,'meta.json'),'w') as f:
+                f.write(json.dumps(self.control, sort_keys=True, indent=2))
+            for fnm, dtb in self.fb.items():
+                with open(os.path.join(directory,fnm),'wb') as f:
+                    f.write(dtb)
+        except KeyboardInterrupt as e:
+            shutil.rmtree(directory)
+            raise e
+    def merge(self, other):
+        for oitem in other.control:
+            self.put(oitem['link'], other.fb[oitem['dname']], oitem['ext'])
+    def bulk_merge(self, others):
+        for other in others:
+            self.merge(other)
diff --git a/reddit_imgs/system/downloader/modules/direct_link.py b/reddit_imgs/system/downloader/modules/direct_link.py
new file mode 100644
index 0000000..dc62d5f
--- /dev/null
+++ b/reddit_imgs/system/downloader/modules/direct_link.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+
+from ..downloadedData import DownloadedData
+from ... import simpleDownloader
+
+def works_on(domain):
+    return domain=='direct_link'
+
+class DirectLink(object):
+    def recognizes(self, link):
+        if (
+                link.startswith('http://u18chan.com/')
+                or
+                link.startswith('https://u18chan.com/')
+                or
+                link.startswith('http://dl.dropboxusercontent.com')
+                or
+                link.startswith('https://dl.dropboxusercontent.com')
+        ):
+            return False
+        return True
+    
+    def download(self, link):
+        dd = DownloadedData()
+        simpleDownloader.cleanCookies()
+        bts = simpleDownloader.getUrlBytes(link)
+        simpleDownloader.cleanCookies()
+        if bts is not None:
+            dd.put(link,bts)
+        return dd
+    
+def get_class():
+    return DirectLink
diff --git a/reddit_imgs/system/downloader/modules/imgur_com.py b/reddit_imgs/system/downloader/modules/imgur_com.py
new file mode 100644
index 0000000..3a299ab
--- /dev/null
+++ b/reddit_imgs/system/downloader/modules/imgur_com.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+
+import re
+import json
+import filetype
+from ..downloadedData import DownloadedData
+from ... import simpleDownloader
+
+def works_on(domain):
+    return domain in ['i.imgur.com', 'imgur.com', 'm.imgur.com', 'www.imgur.com']
+
+class ImgurCom(object):
+    def recognizes(self, link):
+        return True
+    
+    def download(self, link):
+        dd = DownloadedData()
+        simpleDownloader.cleanCookies()
+        bts = b''
+        if '/a/' not in link and '.gifv' not in link and '.webm' not in link:
+            bts = simpleDownloader.getUrlBytes(link)
+        if bts is not None:
+            ext = filetype.guess_extension(bts)
+            if ext is None:
+                if '.gifv' in link or '.webm' in link:
+                    bts=None
+                print(' '*50,end='')
+                print('\r',end='')
+                print(' `--> It wasn\'t a single image...',end='')
+                print('\r',end='')
+                match = re.match(
+                        "(https?)://(www\.)?(i\.|m\.|www\.)?imgur\.com/(?:(a|gallery|r)/)?(\w*)/?(\w*)(#[0-9]+)?(\.\w*)?",
+                        link
+                )
+                tp = match.group(4)
+                ky = None
+                if tp != 'r':
+                    ky = match.group(5)
+                else:
+                    ky = match.group(6)
+                if not ky:
+                    ky = match.group(5)
+                link2 = 'https://imgur.com/a/'+str(ky)+'/all'
+                if tp is None or tp=='' or tp=='r':
+                    link2=link2.replace('/a/','/')[:-4]
+                print(' '*50,end='')
+                print('\r',end='')
+                if link2.endswith('/all') or bts is None:
+                    print(' `--> Fetching album image list...',end='')
+                    bts = simpleDownloader.getUrlBytes(link2)
+                else:
+                    print(' `--> Album image list already fetched...',end='')
+                print('\r',end='')
+                if bts is None:
+                    print(' '*50,end='')
+                    print('\r',end='')
+                    print(' `--> Gallery not found')
+                    return DownloadedData()
+                html = bts.decode('utf-8')
+                albnfo = json.loads(list(filter(lambda f: f.startswith('image'), map(str.strip, list(filter(lambda f: f.startswith("('gallery', {"), html.split('widgetFactory.mergeConfig')))[0].strip().splitlines())))[0][6:-1].strip()[1:].strip())
+                imgs = [albnfo]
+                if 'album_images' in albnfo:
+                    imgs = albnfo['album_images']['images']
+                for seq, img in enumerate(imgs):
+                    print(' '*50,end='')
+                    print('\r',end='')
+                    print(' `--> Album image #%03d of %03d'%(seq+1,len(imgs)),end='')
+                    print('\r',end='')
+                    if img['ext'] == '.gifv':
+                        img['ext'] = '.mp4'
+                    durl = 'http://i.imgur.com/'+img['hash']+img['ext']
+                    imb = simpleDownloader.getUrlBytes(durl)
+                    if imb is None:
+                        print()
+                        print('Album part failed')
+                        print()
+                        simpleDownloader.cleanCookies()
+                        return None
+                    dd.put(durl, imb, img['ext'][1:])
+                    print('\r',end='')
+            else:
+                dd.put(link, bts, ext)
+        simpleDownloader.cleanCookies()
+        return dd
+    
+def get_class():
+    return ImgurCom
diff --git a/reddit_imgs/system/limits.py b/reddit_imgs/system/limits.py
new file mode 100644
index 0000000..b49207e
--- /dev/null
+++ b/reddit_imgs/system/limits.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+
+minint = -2**31
+maxint = 2**31 - 1
\ No newline at end of file
diff --git a/reddit_imgs/system/objectify.py b/reddit_imgs/system/objectify.py
new file mode 100644
index 0000000..c607df7
--- /dev/null
+++ b/reddit_imgs/system/objectify.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+
+class objectify(object):
+    @property
+    def json(self): return self.__dict__
+    def __init__(self, data): self.__dict__ = data
\ No newline at end of file
diff --git a/reddit_imgs/system/simpleDownloader.py b/reddit_imgs/system/simpleDownloader.py
new file mode 100644
index 0000000..bca81b2
--- /dev/null
+++ b/reddit_imgs/system/simpleDownloader.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+
+import time
+import urllib.request
+import urllib.error
+
+cookie = dict()
+
+def delCookie(cookiekey):
+    cookiekey = str(cookiekey)
+    del cookie[cookiekey]
+
+def setCookie(cookiekey, cookieval):
+    cookieval = str(cookieval)
+    cookiekey = str(cookiekey)
+    if not cookiekey: return
+    if not cookieval: delCookie(cookiekey)
+    cookie[cookiekey] = cookieval
+
+def getCookies():
+    return dict(cookie.items())
+
+def patchCookies(newCookies):
+    for nk, nv in newCookies.items():
+        setCookie(nk,nv)
+
+def cleanCookies():
+    global cookie
+    cookie = dict()
+
+def setCookies(newCookies):
+    cleanCookies()
+    patchCookies(newCookies)
+
+def getUrlBytes(url):
+    global cookie
+    request = urllib.request.Request(url)
+    request.add_header('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64) '+
+                       'AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu '+
+                       'Chromium/63.0.3239.84 Chrome/63.0.3239.84 '+
+                       'Safari/537.36'
+    )
+    if len(cookie):
+        request.add_header("Cookie", '; '.join(map(lambda a: '='.join(a), cookie.items())))
+    response = None
+    try:
+        response = urllib.request.urlopen(request, timeout=15)
+    except urllib.error.HTTPError as e:
+        if e.code == 429:
+            print('[URL] Got 429 (Too Many Requests): sleeping for 5 seconds')
+            print('  @   %s'%url)
+            time.sleep(5)
+            return getUrlBytes(url)
+        if e.code == 503:
+            print('[URL] Got 503 (Service Temporarily Unavailable): retrying after 5 seconds')
+            print('  @   %s'%url)
+            time.sleep(5)
+            return getUrlBytes(url)
+        elif e.code == 404:
+            return None
+        elif e.code == 400:
+            return None
+        raise e
+    except urllib.error.URLError as e:
+        if str(e.reason).startswith('[Errno -2]'):
+            return None
+        if str(e.reason).startswith('[Errno -3]'):
+            print('Check your internet connection. It seems gone.')
+        if str(e.reason).startswith('[Errno 110]') or str(e.reason)=='timed out':
+            print('Connection request has timed out - assuming "Not Found"')
+            return None
+        if str(e.reason).startswith('[Errno 111]') or str(e.reason)=='timed out':
+            print('Connection refused - assuming "Not Found"')
+            return None
+        raise e
+    rcode = response.getcode()
+    rinfo = response.info()
+    headers = dict()
+    headers_l = list(map(lambda a: list(map(str.strip, a.split(':',1))), str(rinfo).strip().splitlines()))
+    for header in headers_l:
+        k = header[0].lower()
+        v = header[1]
+        if k not in headers:
+            headers[k]=list()
+        headers[k].append(v)
+        del k
+        del v
+        del header
+    del headers_l
+    if 'set-cookie' in headers:
+        for cke in headers['set-cookie']:
+            ckek = cke.split('=',1)[0].strip()
+            ckev = cke.split('=',1)[1].split(';',1)[0].strip()
+            setCookie(ckek,ckev)
+            del ckek
+            del ckev
+            del cke
+    if rcode == 429:
+        tosleep = 5
+        try: tosleep = int(headers['retry-after'][0])
+        except: pass
+        if tosleep < 1: tosleep = 1
+        print('[URL] Got 429 (Too Many Requests): sleeping for %d seconds'%tosleep)
+        print('  @   %s'%url)
+        time.sleep(tosleep)
+        return getUrlBytes(url)
+    data = None
+    if rcode == 200:
+        data = response.read()
+    response.close()
+    return data
+
+def getUrl(url):
+    return getUrlBytes(url).decode('utf-8')
diff --git a/reddit_imgs/system/subredditTools.py b/reddit_imgs/system/subredditTools.py
new file mode 100644
index 0000000..96de9b2
--- /dev/null
+++ b/reddit_imgs/system/subredditTools.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+
+import dateutil.parser
+from .limits import minint, maxint
+
+def getInfoFromRedditItem(bs):
+    nsfw = 'over18' in bs['class']
+    sharer = bs.find(class_='author').text.strip()
+    title = bs.find('a',class_='title').text.strip()
+    link = str(bs.find('a',class_='title')['href'])
+    domain = bs.find('span',class_='domain').find('a').text.strip()
+    datakey = bs['data-fullname']
+    timestamp = int(dateutil.parser.parse(bs.find('time')['datetime']).strftime('%s'))
+    flair = None
+    try: flair = bs.find('span',class_='linkflairlabel').text.strip()
+    except: pass
+    return {
+            'nsfw': nsfw,
+            'link': link,
+            'title': title,
+            'flair': flair,
+            'sharer': sharer,
+            'domain': domain,
+            'datakey': datakey,
+            'timestamp': timestamp,
+    }
+
+def getEmptySubredditData(srname):
+    return {
+            'subreddit': srname,
+            'date_first': minint,
+            'date_last': maxint,
+            'links': list()
+    }
+
+def getSubredditPageInfo(bs):
+    pagetable = bs.find(id='siteTable')
+    discussions = pagetable.find_all(
+        lambda a: a.has_attr('class') and
+        'thing' in a['class']
+    )
+    links = list(filter(lambda a: 'self' not in a['class'],discussions))
+    first = minint
+    last = maxint
+    try: first = int(dateutil.parser.parse(discussions[0].find('time')['datetime']).strftime('%s'))
+    except: pass
+    try: last = int(dateutil.parser.parse(discussions[-1].find('time')['datetime']).strftime('%s'))
+    except: pass
+    nextpage = None
+    try: nextpage = bs.find('div', class_='nav-buttons').find(class_='nextprev').find(class_='next-button').find('a')['href']
+    except: pass
+    structured_links = list(map(getInfoFromRedditItem, links))
+    return first, last, nextpage, structured_links
diff --git a/redditgetter.py b/redditgetter.py
new file mode 100755
index 0000000..c7c4914
--- /dev/null
+++ b/redditgetter.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python3
+# -*- encoding: utf-8 -*-
+
+import reddit_imgs.runner
+
+if __name__ == '__main__':
+    reddit_imgs.runner.main()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..8ae6ccb
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+filetype==1.0.0