Commit cb2203997643e591fad354adba6e3b12a074c1c8

Authored by Miguel Barão
1 parent dd4d2655
Exists in master and in 1 other branch dev

- much faster (6.5x) initdb by using threads for bcrypt.

Showing 2 changed files with 91 additions and 55 deletions   Show diff stats
BUGS.md
... ... @@ -12,11 +12,12 @@
12 12  
13 13 # TODO
14 14  
15   -- radio e checkboxes, aceitar numeros como seleccao das opcoes.
16   -- each topic only loads a sample of K questions (max) in random order.
  15 +- adicionar codigo para radio e checkboxes onde em vez de se dar uma lista de opcoes, dão-se 2 listas uma de opcoes correctas e outra de erradas.
17 16 - servir imagens/ficheiros.
  17 +- each topic only loads a sample of K questions (max) in random order.
  18 +- radio e checkboxes, aceitar numeros como seleccao das opcoes.
  19 +- reload das perguntas enquanto online. ver signal em http://stackabuse.com/python-async-await-tutorial/
18 20 - pertuntas tipo tristate: (sim, não, não sei
19   -- reload das perguntas enquanto online.
20 21 - tabela de progresso de todos os alunos por topico.
21 22 - tabela com perguntas / quantidade de respostas certas/erradas.
22 23 - tabela com topicos / quantidade de estrelas.
... ... @@ -30,6 +31,7 @@
30 31  
31 32 # FIXED
32 33  
  34 +- async/threadpool no bcrypt do initdb.
33 35 - numero de estrelas depende da proporcao entre certas e erradas.
34 36 - image brand da universidade está esbatida.
35 37 - reportar comentarios após submeter.
... ...
initdb.py
1 1 #!/usr/bin/env python3
2 2  
  3 +# base
3 4 import csv
4 5 import argparse
5 6 import re
6 7 import string
7 8 from sys import exit
  9 +from concurrent.futures import ThreadPoolExecutor
  10 +from multiprocessing import cpu_count
  11 +import asyncio
8 12  
  13 +# installed packages
9 14 import bcrypt
10   -from sqlalchemy import create_engine
11   -from sqlalchemy.orm import sessionmaker
  15 +import sqlalchemy as sa
12 16  
  17 +# this project
13 18 from models import Base, Student
14 19  
  20 +pool = ThreadPoolExecutor() #cpu_count()
  21 +
  22 +# replace password by hash for a single student dict
  23 +def hashpw(student):
  24 + pw = student.get('pw', student['uid']).encode('utf-8')
  25 + print('.', end='', flush=True)
  26 + hashed_pw = bcrypt.hashpw(pw, bcrypt.gensalt())
  27 + student['pw'] = hashed_pw
  28 +
  29 +
  30 +async def hash_all_passwords(executor, students):
  31 + loop = asyncio.get_event_loop()
  32 + tasks = [loop.run_in_executor(executor, hashpw, s) for s in students]
  33 + await asyncio.wait(tasks) # block until all tasks are done
  34 + print()
  35 +
15 36 # SIIUE names have alien strings like "(TE)" and are sometimes capitalized
16 37 # We remove them so that students dont keep asking what it means
17 38 def fix(name):
... ... @@ -19,74 +40,83 @@ def fix(name):
19 40  
20 41 # ===========================================================================
21 42 # Parse command line options
22   -argparser = argparse.ArgumentParser(
23   - description='Create new database from a CSV file (SIIUE format)')
  43 +def parse_commandline_arguments():
  44 + argparser = argparse.ArgumentParser(
  45 + description='Create new database from a CSV file (SIIUE format)')
24 46  
25   -argparser.add_argument('--db',
26   - default='students.db',
27   - type=str,
28   - help='database filename')
  47 + argparser.add_argument('--db',
  48 + default='students.db',
  49 + type=str,
  50 + help='database filename')
29 51  
30   -argparser.add_argument('--demo',
31   - action='store_true',
32   - help='initialize database with a few fake students')
  52 + argparser.add_argument('--demo',
  53 + action='store_true',
  54 + help='initialize database with a few fake students')
33 55  
34   -argparser.add_argument('--pw',
35   - default='',
36   - type=str,
37   - help='default password')
  56 + # FIXME
  57 + # argparser.add_argument('--pw',
  58 + # default='',
  59 + # type=str,
  60 + # help='default password')
38 61  
39   -argparser.add_argument('csvfile',
40   - nargs='?',
41   - type=str,
42   - default='',
43   - help='CSV filename')
  62 + argparser.add_argument('csvfile',
  63 + nargs='?',
  64 + type=str,
  65 + default='',
  66 + help='CSV filename')
44 67  
45   -args = argparser.parse_args()
  68 + return argparser.parse_args()
46 69  
47   -# =======================================================x====================
48   -engine = create_engine(f'sqlite:///{args.db}', echo=False)
49   -Base.metadata.create_all(engine) # Criate schema if needed
50   -Session = sessionmaker(bind=engine)
51   -
52   -# add administrator
53   -students = {'0': 'Professor'}
54   -
55   -if args.csvfile:
56   - # add students from csv file if available
  70 +# ===========================================================================
  71 +def get_students_from_csv(filename):
57 72 try:
58   - csvreader = csv.DictReader(open(args.csvfile, encoding='iso-8859-1'), delimiter=';', quotechar='"', skipinitialspace=True)
  73 + csvreader = csv.DictReader(open(filename, encoding='iso-8859-1'), delimiter=';', quotechar='"', skipinitialspace=True)
59 74 except EnvironmentError:
60   - print(f'Error: CSV file "{args.csvfile}" not found.')
  75 + print(f'Error: CSV file "{filename}" not found.')
61 76 exit(1)
62   - students.update({s['N.º']: fix(s['Nome']) for s in csvreader})
  77 + students = [{
  78 + 'uid': s['N.º'],
  79 + 'name': fix(s['Nome'])
  80 + } for s in csvreader]
  81 +
  82 + return students
63 83  
  84 +# ===========================================================================
  85 +args = parse_commandline_arguments()
  86 +
  87 +if args.csvfile:
  88 + students = get_students_from_csv(args.csvfile)
64 89 elif args.demo:
65   - # add a few fake students
66   - students.update({
67   - '1915': 'Alan Turing',
68   - '1938': 'Donald Knuth',
69   - '1815': 'Ada Lovelace',
70   - '1969': 'Linus Torvalds',
71   - '1955': 'Tim Burners-Lee',
72   - '1916': 'Claude Shannon',
73   - '1903': 'John von Neumann',
74   - })
75   -
76   -print(f'Generating {len(students)} bcrypt password hashes. This will take some time...')
  90 + students = [
  91 + {'uid': '1915', 'name': 'Alan Turing'},
  92 + {'uid': '1938', 'name': 'Donald Knuth'},
  93 + {'uid': '1815', 'name': 'Ada Lovelace'},
  94 + {'uid': '1969', 'name': 'Linus Torvalds'},
  95 + {'uid': '1955', 'name': 'Tim Burners-Lee'},
  96 + {'uid': '1916', 'name': 'Claude Shannon'},
  97 + {'uid': '1903', 'name': 'John von Neumann'}]
  98 +students.append({'uid': '0', 'name': 'Admin'})
  99 +
  100 +print(f'Generating {len(students)} bcrypt password hashes.')
  101 +executor = ThreadPoolExecutor(cpu_count())
  102 +event_loop = asyncio.get_event_loop()
  103 +event_loop.run_until_complete(hash_all_passwords(executor, students))
  104 +event_loop.close()
  105 +
  106 +print(f'Creating database: {args.db}')
  107 +engine = sa.create_engine(f'sqlite:///{args.db}', echo=False)
  108 +Base.metadata.create_all(engine) # Criate schema if needed
  109 +Session = sa.orm.sessionmaker(bind=engine)
77 110  
78 111 try:
79 112 # --- start db session ---
80 113 session = Session()
81 114  
82   - for num, name in students.items():
83   - print('.', end='', flush=True)
84   - pw = (args.pw or num).encode('utf-8')
85   - session.add(Student(id=num, name=name, password=bcrypt.hashpw(pw, bcrypt.gensalt())))
86   - print()
  115 + session.add_all([Student(id=s['uid'], name=s['name'], password=s['pw'])
  116 + for s in students])
87 117  
88 118 n = session.query(Student).count()
89   - print(f'New database created: {args.db}\n{n} user(s) inserted:')
  119 + print(f'{n} user(s):')
90 120  
91 121 users = session.query(Student).order_by(Student.id).all()
92 122 print(f' {users[0].id:8} - {users[0].name} (administrator)')
... ... @@ -97,6 +127,10 @@ try:
97 127 if n > 2:
98 128 print(f' {users[-1].id:8} - {users[-1].name}')
99 129  
  130 +except sa.exc.IntegrityError:
  131 + print('!!! Integrity error !!!')
  132 + session.rollback()
  133 +
100 134 except Exception as e:
101 135 print(f'Error: Database "{args.db}" already exists?')
102 136 session.rollback()
... ...