Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# (c) 2018-2020 

2# MPIB <https://www.mpib-berlin.mpg.de/>, 

3# MPI-CBS <https://www.cbs.mpg.de/>, 

4# MPIP <http://www.psych.mpg.de/> 

5# 

6# This file is part of Castellum. 

7# 

8# Castellum is free software; you can redistribute it and/or modify it 

9# under the terms of the GNU Affero General Public License as published 

10# by the Free Software Foundation; either version 3 of the License, or 

11# (at your option) any later version. 

12# 

13# Castellum is distributed in the hope that it will be useful, but 

14# WITHOUT ANY WARRANTY; without even the implied warranty of 

15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 

16# Affero General Public License for more details. 

17# 

18# You should have received a copy of the GNU Affero General Public 

19# License along with Castellum. If not, see 

20# <http://www.gnu.org/licenses/>. 

21 

22import functools 

23 

24from django.conf import settings 

25from django.contrib.gis.geos import Point 

26from django.core.management.base import BaseCommand 

27from django.core.management.base import CommandError 

28 

29import phonenumbers 

30from faker import Faker 

31 

32from castellum.contacts.models import Address 

33from castellum.contacts.models import Contact 

34from castellum.contacts.models import phonetic 

35from castellum.recruitment.models import AttributeChoice 

36from castellum.recruitment.models import AttributeSet 

37from castellum.studies.models import Study 

38from castellum.studies.models import StudySession 

39from castellum.subjects.models import Consent 

40from castellum.subjects.models import ConsentDocument 

41from castellum.subjects.models import Subject 

42 

43fake = Faker('de_DE') 

44 

45 

46@functools.lru_cache(maxsize=1) 

47def get_highest_degree_choices(): 

48 return AttributeChoice.objects.filter(description__pk=4).values_list('pk', flat=True) 

49 

50 

51def fake_date_of_birth(): 

52 age_groups = [ 

53 # (start age, percentage) 

54 (14, 0.06), 

55 (20, 0.14), 

56 (30, 0.14), 

57 (40, 0.16), 

58 (50, 0.19), 

59 (60, 0.14), 

60 (70, 0.16), 

61 ] 

62 

63 r = fake.random.random() 

64 percentage = 0 

65 

66 for i in range(len(age_groups) - 1): 

67 percentage += age_groups[i][1] 

68 

69 if r < percentage: 

70 start_date = '-{}y'.format(age_groups[i + 1][0]) 

71 end_date = '-{}y'.format(age_groups[i][0]) 

72 return fake.date_between(start_date=start_date, end_date=end_date) 

73 

74 start_date = '-{}y'.format(100) 

75 end_date = '-{}y'.format(age_groups[-1][0]) 

76 return fake.date_between(start_date=start_date, end_date=end_date) 

77 

78 

79def fake_phone_number(): 

80 number = fake.phone_number() 

81 try: 

82 parsed = phonenumbers.parse(number, settings.PHONENUMBER_DEFAULT_REGION) 

83 if phonenumbers.is_valid_number(parsed): 83 ↛ 88line 83 didn't jump to line 88, because the condition on line 83 was never false

84 return number 

85 except phonenumbers.NumberParseException: 

86 pass 

87 # Just try again 

88 return fake_phone_number() 

89 

90 

91def fake_language(): 

92 groups = [ 

93 # (language, percentage) 

94 ('de', 0.70), 

95 ('en', 0.10), 

96 ('tr', 0.06), 

97 ('fr', 0.06), 

98 ('es', 0.06), 

99 ('it', 0.02), 

100 ] 

101 

102 r = fake.random.random() 

103 percentage = 0 

104 

105 for lang, p in groups: 105 ↛ 111line 105 didn't jump to line 111, because the loop on line 105 didn't complete

106 percentage += p 

107 

108 if r < percentage: 108 ↛ 105line 108 didn't jump to line 105, because the condition on line 108 was never false

109 return lang 

110 

111 return groups[-1][0] 

112 

113 

114def fake_subject(): 

115 return Subject(privacy_level=fake.random.randint(0, 2)) 

116 

117 

118def fake_studysession(study): 

119 return StudySession(study=study, name=fake.word(), duration=fake.random.randrange(10, 120, 10)) 

120 

121 

122def fake_study(): 

123 description = fake.paragraph(nb_sentences=5, variable_nb_sentences=True) 

124 recruitment_text = fake.paragraph(nb_sentences=5, variable_nb_sentences=True) 

125 study = Study( 

126 name=fake.word().upper(), 

127 contact_person=fake.name(), 

128 principal_investigator=fake.name(), 

129 min_subject_count=fake.random.randint(0, 50), 

130 description=description, 

131 recruitment_text=recruitment_text, 

132 sessions_start=fake.past_date(start_date="-30d"), 

133 sessions_end=fake.future_date(end_date="+30d"), 

134 ) 

135 return study 

136 

137 

138def fake_address(contact): 

139 return Address( 

140 contact=contact, 

141 country='Deutschland', 

142 city=fake.city(), 

143 zip_code=fake.postcode(), 

144 street=fake.street_name(), 

145 house_number=fake.random.randint(1, 200), 

146 ) 

147 

148 

149def fake_contact(subject): 

150 if fake.random.random() < 0.5: 

151 gender = 'f' 

152 first_name = fake.first_name_female() 

153 last_name = fake.last_name_female() 

154 else: 

155 gender = 'm' 

156 first_name = fake.first_name_male() 

157 last_name = fake.last_name_male() 

158 

159 contact = Contact( 

160 gender=gender, 

161 first_name=first_name, 

162 first_name_phonetic=phonetic(first_name), 

163 last_name=last_name, 

164 last_name_phonetic=phonetic(last_name), 

165 date_of_birth=fake_date_of_birth(), 

166 subject_id=subject.pk, 

167 ) 

168 

169 if fake.random.random() < 0.67: 

170 contact.email = fake.email() 

171 if fake.random.random() < 0.75: 171 ↛ 174line 171 didn't jump to line 174, because the condition on line 171 was never false

172 contact.phone_number = fake_phone_number() 

173 

174 return contact 

175 

176 

177def fake_attributeset(contact): 

178 data = {} 

179 if fake.random.random() < 0.9: 179 ↛ 181line 179 didn't jump to line 181, because the condition on line 179 was never false

180 data['d1'] = 2 if fake.random.random() < 0.15 else 1 

181 if fake.random.random() < 0.9: 181 ↛ 183line 181 didn't jump to line 183, because the condition on line 181 was never false

182 data['d2'] = fake_language() 

183 if fake.random.random() < 0.9: 183 ↛ 185line 183 didn't jump to line 185, because the condition on line 183 was never false

184 data['d3'] = contact.date_of_birth 

185 if fake.random.random() < 0.9: 185 ↛ 187line 185 didn't jump to line 187, because the condition on line 185 was never false

186 data['d4'] = fake.random.choice(get_highest_degree_choices()) 

187 return AttributeSet(subject_id=contact.subject_id, data=data) 

188 

189 

190def generate_geolocations(contacts): 

191 from castellum.geofilters.models import Geolocation 

192 

193 Geolocation.objects.bulk_create(Geolocation( 

194 contact=contact, 

195 point=Point( 

196 fake.random.uniform(13.2378, 13.6224), 

197 fake.random.uniform(52.4085, 52.5753), 

198 ), 

199 ) for contact in contacts) 

200 

201 

202def generate_studies(count): 

203 print('Generating studies') 

204 studies = [fake_study() for i in range(count)] 

205 for study in studies: 

206 study.save() 

207 StudySession.objects.bulk_create(fake_studysession(study) for study in studies) 

208 

209 

210def generate_subjects(count): 

211 # NOTE: We have ForeignKeys to Address and Subject. Unfortunately, bulk_create does not set 

212 # the id on the instances, so we have to get them from the database after bulk_create. This 

213 # assumes that the database was empty before. 

214 

215 print('Generating subjects') 

216 Subject.objects.bulk_create([fake_subject() for i in range(count)], batch_size=200) 

217 subjects = Subject.objects.all()[:count] 

218 

219 print('Generating contacts') 

220 contacts = [fake_contact(subjects[i]) for i in range(count-1)] 

221 testcontact = Contact( 

222 gender='*', 

223 first_name='Muhammad', 

224 first_name_phonetic=phonetic('Muhammad'), 

225 last_name='Nguyen', 

226 last_name_phonetic=phonetic('Nguyen'), 

227 date_of_birth='2000-12-13', 

228 email='muhammad@example.com', 

229 subject_id=subjects[count - 1].pk, 

230 ) 

231 contacts.append(testcontact) 

232 

233 Contact.objects.bulk_create(contacts) 

234 contacts = Contact.objects.all()[:count] 

235 

236 print('Generating addresses') 

237 Address.objects.bulk_create(fake_address(contact) for contact in contacts) 

238 

239 if 'castellum.geofilters' in settings.INSTALLED_APPS: 239 ↛ 243line 239 didn't jump to line 243, because the condition on line 239 was never false

240 print('Generating geolocations') 

241 generate_geolocations(contacts) 

242 

243 print('Generating attributesets') 

244 attributesets = [fake_attributeset(contact) for contact in contacts] 

245 AttributeSet.objects.bulk_create(attributesets) 

246 

247 print('Generating consents') 

248 try: 

249 document = ConsentDocument.objects.filter(is_valid=True).latest() 

250 except ConsentDocument.DoesNotExist: 

251 document = ConsentDocument.objects.create() 

252 consents = [Consent(subject=s, document=document, status=Consent.CONFIRMED) for s in subjects] 

253 Consent.objects.bulk_create(consents) 

254 

255 

256class Command(BaseCommand): 

257 help = 'Populate database with demo data.' 

258 

259 def add_arguments(self, parser): 

260 parser.add_argument('--study-count', default=5, type=int) 

261 parser.add_argument('--subject-count', default=2000, type=int) 

262 

263 def handle(self, *args, **options): 

264 if settings.PRODUCTION: 

265 raise CommandError('Demo content not created in production environment.') 

266 

267 if not Study.objects.exists(): 

268 generate_studies(options['study_count']) 

269 if not Subject.objects.exists(): 

270 generate_subjects(options['subject_count'])