gen_tld.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import sys, os
  4. def main():
  5. alphabet = 'abcdefghijklmnopqrstuvwxyz'
  6. tlds = dict()
  7. for s in alphabet:
  8. tlds[s] = list()
  9. tlds['xn--'] = list()
  10. tld_file = open(sys.argv[1], 'r')
  11. for line in tld_file.readlines():
  12. domain = line.strip().lower()
  13. for label in tlds:
  14. if domain.startswith('xn--'):
  15. tlds['xn--'].append(domain)
  16. break
  17. elif domain.startswith('x'):
  18. tlds['x'].append(domain)
  19. break
  20. else:
  21. if domain.startswith(label):
  22. tlds[label].append(domain)
  23. break
  24. print '// actual list can be found at http://data.iana.org/TLD/tlds-alpha-by-domain.txt'
  25. print 'static const char* const TopLevelDomains[] = {'
  26. for label, value in sorted(tlds.iteritems()):
  27. if label == 'xn--':
  28. sys.stdout.write(' /* ')
  29. str = ''
  30. for n in value:
  31. unicode_domain = n.decode('idna').encode('utf-8')
  32. str += ('%s, ' % unicode_domain)
  33. sys.stdout.write('%s*/\n' % str.rstrip())
  34. sys.stdout.write(' ')
  35. str = ''
  36. for n in value:
  37. str += ('"%s", ' % n)
  38. sys.stdout.write('%s\n' % str.rstrip())
  39. else:
  40. sys.stdout.write(' ')
  41. str = ''
  42. for n in value:
  43. str += ('"%s", ' % n)
  44. sys.stdout.write('%s\n' % str.rstrip())
  45. print ' 0'
  46. print '};'
  47. if __name__ == '__main__':
  48. main()