Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# (c) 2018-2020
2# MPIB <https://www.mpib-berlin.mpg.de/>,
3# MPI-CBS <https://www.cbs.mpg.de/>,
4# MPIP <http://www.psych.mpg.de/>
5#
6# This file is part of Castellum.
7#
8# Castellum is free software; you can redistribute it and/or modify it
9# under the terms of the GNU Affero General Public License as published
10# by the Free Software Foundation; either version 3 of the License, or
11# (at your option) any later version.
12#
13# Castellum is distributed in the hope that it will be useful, but
14# WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16# Affero General Public License for more details.
17#
18# You should have received a copy of the GNU Affero General Public
19# License along with Castellum. If not, see
20# <http://www.gnu.org/licenses/>.
22import functools
24from django.conf import settings
25from django.contrib.gis.geos import Point
26from django.core.management.base import BaseCommand
27from django.core.management.base import CommandError
29import phonenumbers
30from faker import Faker
32from castellum.contacts.models import Address
33from castellum.contacts.models import Contact
34from castellum.contacts.models import phonetic
35from castellum.recruitment.models import AttributeChoice
36from castellum.recruitment.models import AttributeSet
37from castellum.studies.models import Study
38from castellum.studies.models import StudySession
39from castellum.subjects.models import Consent
40from castellum.subjects.models import ConsentDocument
41from castellum.subjects.models import Subject
43fake = Faker('de_DE')
46@functools.lru_cache(maxsize=1)
47def get_highest_degree_choices():
48 return AttributeChoice.objects.filter(description__pk=4).values_list('pk', flat=True)
51def fake_date_of_birth():
52 age_groups = [
53 # (start age, percentage)
54 (14, 0.06),
55 (20, 0.14),
56 (30, 0.14),
57 (40, 0.16),
58 (50, 0.19),
59 (60, 0.14),
60 (70, 0.16),
61 ]
63 r = fake.random.random()
64 percentage = 0
66 for i in range(len(age_groups) - 1):
67 percentage += age_groups[i][1]
69 if r < percentage:
70 start_date = '-{}y'.format(age_groups[i + 1][0])
71 end_date = '-{}y'.format(age_groups[i][0])
72 return fake.date_between(start_date=start_date, end_date=end_date)
74 start_date = '-{}y'.format(100)
75 end_date = '-{}y'.format(age_groups[-1][0])
76 return fake.date_between(start_date=start_date, end_date=end_date)
79def fake_phone_number():
80 number = fake.phone_number()
81 try:
82 parsed = phonenumbers.parse(number, settings.PHONENUMBER_DEFAULT_REGION)
83 if phonenumbers.is_valid_number(parsed): 83 ↛ 88line 83 didn't jump to line 88, because the condition on line 83 was never false
84 return number
85 except phonenumbers.NumberParseException:
86 pass
87 # Just try again
88 return fake_phone_number()
91def fake_language():
92 groups = [
93 # (language, percentage)
94 ('de', 0.70),
95 ('en', 0.10),
96 ('tr', 0.06),
97 ('fr', 0.06),
98 ('es', 0.06),
99 ('it', 0.02),
100 ]
102 r = fake.random.random()
103 percentage = 0
105 for lang, p in groups: 105 ↛ 111line 105 didn't jump to line 111, because the loop on line 105 didn't complete
106 percentage += p
108 if r < percentage: 108 ↛ 105line 108 didn't jump to line 105, because the condition on line 108 was never false
109 return lang
111 return groups[-1][0]
114def fake_subject():
115 return Subject(privacy_level=fake.random.randint(0, 2))
118def fake_studysession(study):
119 return StudySession(study=study, name=fake.word(), duration=fake.random.randrange(10, 120, 10))
122def fake_study():
123 description = fake.paragraph(nb_sentences=5, variable_nb_sentences=True)
124 recruitment_text = fake.paragraph(nb_sentences=5, variable_nb_sentences=True)
125 study = Study(
126 name=fake.word().upper(),
127 contact_person=fake.name(),
128 principal_investigator=fake.name(),
129 min_subject_count=fake.random.randint(0, 50),
130 description=description,
131 recruitment_text=recruitment_text,
132 sessions_start=fake.past_date(start_date="-30d"),
133 sessions_end=fake.future_date(end_date="+30d"),
134 )
135 return study
138def fake_address(contact):
139 return Address(
140 contact=contact,
141 country='Deutschland',
142 city=fake.city(),
143 zip_code=fake.postcode(),
144 street=fake.street_name(),
145 house_number=fake.random.randint(1, 200),
146 )
149def fake_contact(subject):
150 if fake.random.random() < 0.5:
151 gender = 'f'
152 first_name = fake.first_name_female()
153 last_name = fake.last_name_female()
154 else:
155 gender = 'm'
156 first_name = fake.first_name_male()
157 last_name = fake.last_name_male()
159 contact = Contact(
160 gender=gender,
161 first_name=first_name,
162 first_name_phonetic=phonetic(first_name),
163 last_name=last_name,
164 last_name_phonetic=phonetic(last_name),
165 date_of_birth=fake_date_of_birth(),
166 subject_id=subject.pk,
167 )
169 if fake.random.random() < 0.67:
170 contact.email = fake.email()
171 if fake.random.random() < 0.75: 171 ↛ 174line 171 didn't jump to line 174, because the condition on line 171 was never false
172 contact.phone_number = fake_phone_number()
174 return contact
177def fake_attributeset(contact):
178 data = {}
179 if fake.random.random() < 0.9: 179 ↛ 181line 179 didn't jump to line 181, because the condition on line 179 was never false
180 data['d1'] = 2 if fake.random.random() < 0.15 else 1
181 if fake.random.random() < 0.9: 181 ↛ 183line 181 didn't jump to line 183, because the condition on line 181 was never false
182 data['d2'] = fake_language()
183 if fake.random.random() < 0.9: 183 ↛ 185line 183 didn't jump to line 185, because the condition on line 183 was never false
184 data['d3'] = contact.date_of_birth
185 if fake.random.random() < 0.9: 185 ↛ 187line 185 didn't jump to line 187, because the condition on line 185 was never false
186 data['d4'] = fake.random.choice(get_highest_degree_choices())
187 return AttributeSet(subject_id=contact.subject_id, data=data)
190def generate_geolocations(contacts):
191 from castellum.geofilters.models import Geolocation
193 Geolocation.objects.bulk_create(Geolocation(
194 contact=contact,
195 point=Point(
196 fake.random.uniform(13.2378, 13.6224),
197 fake.random.uniform(52.4085, 52.5753),
198 ),
199 ) for contact in contacts)
202def generate_studies(count):
203 print('Generating studies')
204 studies = [fake_study() for i in range(count)]
205 for study in studies:
206 study.save()
207 StudySession.objects.bulk_create(fake_studysession(study) for study in studies)
210def generate_subjects(count):
211 # NOTE: We have ForeignKeys to Address and Subject. Unfortunately, bulk_create does not set
212 # the id on the instances, so we have to get them from the database after bulk_create. This
213 # assumes that the database was empty before.
215 print('Generating subjects')
216 Subject.objects.bulk_create([fake_subject() for i in range(count)], batch_size=200)
217 subjects = Subject.objects.all()[:count]
219 print('Generating contacts')
220 contacts = [fake_contact(subjects[i]) for i in range(count-1)]
221 testcontact = Contact(
222 gender='*',
223 first_name='Muhammad',
224 first_name_phonetic=phonetic('Muhammad'),
225 last_name='Nguyen',
226 last_name_phonetic=phonetic('Nguyen'),
227 date_of_birth='2000-12-13',
228 email='muhammad@example.com',
229 subject_id=subjects[count - 1].pk,
230 )
231 contacts.append(testcontact)
233 Contact.objects.bulk_create(contacts)
234 contacts = Contact.objects.all()[:count]
236 print('Generating addresses')
237 Address.objects.bulk_create(fake_address(contact) for contact in contacts)
239 if 'castellum.geofilters' in settings.INSTALLED_APPS: 239 ↛ 243line 239 didn't jump to line 243, because the condition on line 239 was never false
240 print('Generating geolocations')
241 generate_geolocations(contacts)
243 print('Generating attributesets')
244 attributesets = [fake_attributeset(contact) for contact in contacts]
245 AttributeSet.objects.bulk_create(attributesets)
247 print('Generating consents')
248 try:
249 document = ConsentDocument.objects.filter(is_valid=True).latest()
250 except ConsentDocument.DoesNotExist:
251 document = ConsentDocument.objects.create()
252 consents = [Consent(subject=s, document=document, status=Consent.CONFIRMED) for s in subjects]
253 Consent.objects.bulk_create(consents)
256class Command(BaseCommand):
257 help = 'Populate database with demo data.'
259 def add_arguments(self, parser):
260 parser.add_argument('--study-count', default=5, type=int)
261 parser.add_argument('--subject-count', default=2000, type=int)
263 def handle(self, *args, **options):
264 if settings.PRODUCTION:
265 raise CommandError('Demo content not created in production environment.')
267 if not Study.objects.exists():
268 generate_studies(options['study_count'])
269 if not Subject.objects.exists():
270 generate_subjects(options['subject_count'])