Carlos Aguni

Highly motivated self-taught IT analyst. Always learning and ready to explore new skills. An eternal apprentice.


Poor nosql db

21 Dec 2021 »
from flask import Flask, request, jsonify, json, abort, redirect, url_for, render_template, send_file
from flask_cors import CORS, cross_origin
import os
import re
import io
import subprocess
import traceback
import flatdict
import shutil
import uuid
import threading
import base64
import time
import magic #python-magic

mime = magic.Magic(mime=True)

app = Flask(__name__, template_folder='template')
cors = CORS(app)


rootfolder = "./data"
if not os.path.exists(rootfolder):
    os.mkdir(rootfolder)
metafolder = "./metadata"
if not os.path.exists(metafolder):
    os.mkdir(metafolder)

def genserial():
    return uuid.uuid4().hex

def sync():
    print("start sync work sleep 30")
    time.sleep(5)
    print("do sync")

thread = None

def save_json(doc, filename):
    with open(filename, "w") as f:
        #f.write(json.dumps(doc, default=str))
        f.write(json.dumps(doc, default=lambda x: str(x) if not 
                                isinstance(x, flatdict.FlatDict) else {}))

def compress(doc):
    ret = dict(flatdict.FlatDict(doc))
    ret = json.dumps(ret, default=lambda x: str(x) if not 
                            isinstance(x, flatdict.FlatDict) else {})
    ret = json.loads(ret)
    return ret

def parse(doc):
    return flatdict.FlatDict(doc).as_dict()

def listcols():
    return os.listdir(rootfolder)

def listdocs(col):
    meta = getmeta(col)
    return list(meta.values())

def getdoc(col, dockey, params={}):
    meta = getmeta(col)
    metarev = {v:k for k,v in meta.items()}
    if not dockey in metarev:
        return {}
    serial = metarev[dockey]
    filepath = f"{rootfolder}/{col}/{serial}"
    ret = {}
    if os.path.exists(filepath):
        #print("getdoc open filepath read", open(filepath).read())
        ret = parse(json.loads(open(filepath).read()))
        #print("getdoc", ret)
        pkeys = json.loads(params.get('keys', '{}'))
        #print(pkeys)
        if pkeys:
            if set(pkeys.values()) == {0}:
                ret = {k:v for k,v in ret.items() \
                        if k not in pkeys}
            elif set(pkeys.values()) == {1}:
                ret = {k:v for k,v in ret.items() \
                        if k in pkeys}
            elif set(pkeys.values()) == {0,1}:
                ret = {k:v for k,v in ret.items() \
                        if k in pkeys and pkeys[k] == 1}

    #print(ret)
    return ret

def getmeta(col):
    filepath = f"{metafolder}/{col}"
    if os.path.exists(filepath):
        return json.loads(open(filepath).read())
    return {}

def updatedoc(col, doc, params={}):
    global thread
    meta = getmeta(col)
    metarev = {v:k for k,v in meta.items()}
    if not 'key' in doc or doc['key'] is None:
        doc['key'] = '<genserial>'
    dockey = doc['key']
    print("updatedoc", doc)
    serial = ""
    if dockey in metarev:
        serial = metarev[dockey]
    else:
        serial = genserial()
        while os.path.exists(f"{rootfolder}/{col}/{serial}"):
            serial = genserial()
    
    if dockey == "<genserial>":
        dockey = serial

    doc = compress(doc)
    print("compressdoc", doc)
    print(getdoc(col, dockey, params))
    doc2 = compress(getdoc(col, dockey, params))
    print("compressdoc2", doc2)
    fdoc = {
        **doc2,
        **doc,
        "key": dockey,
    }
    meta[serial] = dockey
    coldir = f"{rootfolder}/{col}"
    if not os.path.exists(coldir):
        os.mkdir(coldir)
    save_json(meta, f"{metafolder}/{col}")
    save_json(fdoc, f"{rootfolder}/{col}/{serial}")

    if thread is None or not thread.is_alive():
        print("starting thread")
        thread = threading.Thread(target=sync, args=())
        thread.start()
    return parse(fdoc)

@app.route('/<action>', defaults={'col': '', 'dockey': ''}, methods=['GET', 'POST'])
@app.route('/<action>/<col>', defaults={'dockey': ''}, methods=['GET', 'POST'])
@app.route('/<action>/<col>/<dockey>', methods=['GET', 'POST'])
def action_col_dockey(action="", col="", dockey=""):

    params = request.args.to_dict()
    doc = request.json

    if action == "upfile":
        print(" here upfiel")
        return r_upfile(col, dockey)
    if action == "s":
        print(" here upfiel")
        return s_upfile(col, dockey)

    print("params", params)

    ret = {}
    
    if action == "delcol":
        if "." in col: return "ok"
        datapath = f"{rootfolder}/{col}"
        metapath = f"{metafolder}/{col}"
        if os.path.exists(datapath):
            shutil.rmtree(datapath)
        if os.path.exists(metapath):
            os.remove(metapath)
        ret = "del ok"
    elif action == "get":
        ret = getdoc(col, dockey, params)
    elif action == "post":
        ret = updatedoc(col, doc, params)
    elif action == "listcols":
        ret = listcols()
    elif action == "listdocs":
        ret = listdocs(col)
    elif action == "listfull":
        dockeys = listdocs(col)
        ret = [
            getdoc(col, dockey, params)
            for dockey in dockeys
        ]

        pfilters = json.loads(params.get('filters', '{}'))
        if pfilters:
            pfilters = {k:str(v).lower() for k,v in pfilters.items()}
            ret2 = []
            for r in ret:
                for k,v in pfilters.items():
                    if v.lower() in r.get(k, '').lower():
                        ret2.append(r)
                        break
            ret = ret2
    if params.get('rt') == 'img':
        val = base64.b64decode(ret['value'].split("base64,")[1])
        buf = io.BytesIO(val)
        buf.seek(0)
        return send_file(buf, mimetype="image/png")
    return jsonify(ret)

fsmetadatapath = "./fsmetadata"
fsdatapath = "./fsdata"
for folder in [fsmetadatapath, fsdatapath]:
    if not os.path.exists(folder):
        os.mkdir(folder)

def gen_serial(filename):
    if not "." in filename:
        ext = ""
    else:
        ext = filename.split(".")[-1]
    
    genhex = lambda: uuid.uuid4().hex
    
    to_filename = f"{genhex()}.{ext}"
    while os.path.exists(os.path.join(fsdatapath, to_filename)):
        to_filename = f"{genhex()}.{ext}"
    return to_filename


def s_upfile(path1="", path2=""):
    db = getdoc("upfile", "vault")
    print("db", db)
    if not db:
        db = {
            "key": "vault",
            "data": {}
        }
    if path1 != "":
        if path1 in db['data']:
            filepath = os.path.join(fsdatapath, path1)
            #m = mime.from_file(filepath)
            #print("mime", m)
            mimetype = None
            if filepath.endswith(".md"):
                mimetype = "text/plain"
            filename = db['data'][path1]['filename']
            if os.path.exists(filepath+"00"):
                buf = io.BytesIO()
                i = 0
                filepath2 = f"{filepath}{i:02}"
                while os.path.exists(filepath2):
                    shutil.copyfileobj(open(filepath2, 'rb'), buf)
                    i+=1
                    filepath2 = f"{filepath}{i:02}"

                print(filepath2)
                buf.seek(0)
                return send_file(buf, 
                                 attachment_filename=filename,
                                 as_attachment=False,
                                 conditional=True,
                                 mimetype=mimetype,
                                 )


            
            return send_file(filepath, 
                             attachment_filename=db['data'][path1]['filename'], 
                             as_attachment=False,
                             conditional=True,
                             mimetype=mimetype,
                             )

def r_upfile(path1="", path2=""):
    ret = ""
    print("r_upfile", request.method)
    db = getdoc("upfile", "vault")
    print("db", db)
    if not db:
        db = {
            "key": "vault",
            "data": {}
        }
    if request.method == 'GET' or path1 == "root":
        if path1 == "" or path1 == "root":
            ret = open("upfile_template.html").read()
            print(db)
            listhtml = []
            for serial, doc in db['data'].items():
                filename = doc['filename']
                listhtml.append(f"""
                <a target='_blank' href='/s/{serial}/{filename}'>{filename}</a><br>
                """)
            listhtml = "".join(listhtml)
            ret = ret.replace("FILELIST", listhtml)
    elif request.method == 'POST':
        for f in request.files.getlist('file[]'):
            filename = gen_serial(f.filename)
            print(filename, f.filename)
            filepath = os.path.join(fsdatapath, filename)
            f.save(filepath)
            m = mime.from_file(filepath)
            size = os.path.getsize(filepath)
            print("size", size, size > 5*(1024**2))
            os.system(f"split -b 5M -d {filepath} {filepath}")
            os.remove(filepath)
            db['data'][filename] = {
                "filename": f.filename,
                "mime": m,
            }
            updatedoc("upfile", db)
        return r_upfile("root")


    return ret


# gunicorn --workers=2 'app:create_app()' --bind=0.0.0.0:<port>
def create_app():
    return app

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=9000, threaded=False)
    #app.run(host='0.0.0.0', port=9000)
    
    #test 
    #with app.test_client() as c:
    #    rs = c.get("/")
    #    print(rs.data)