From 17424095da9051c7e894e09b71aeec9271846b09 Mon Sep 17 00:00:00 2001 From: Doug Bird Date: Wed, 13 Dec 2017 20:49:59 -0800 Subject: [PATCH] automated format generation --- .gitignore | 4 + assets/tld-desc.csv | 284 ++++++++++++++++++++++++++ bin/helpers/generate-js-tld-enum.js | 107 +++++++++- bin/helpers/generate-json-tld-enum.js | 98 ++++++++- bin/helpers/generate-php-tld-enum.php | 192 +++++++++++++++-- bin/helpers/generate-tlds-csv.js | 227 +++++++++++++++++++- bin/update-formats | 165 ++++++++++++++- package.json | 63 +++--- 8 files changed, 1084 insertions(+), 56 deletions(-) create mode 100644 assets/tld-desc.csv mode change 100644 => 100755 bin/helpers/generate-js-tld-enum.js mode change 100644 => 100755 bin/helpers/generate-json-tld-enum.js mode change 100644 => 100755 bin/helpers/generate-php-tld-enum.php mode change 100644 => 100755 bin/update-formats diff --git a/.gitignore b/.gitignore index 625b226..0a3d62d 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,7 @@ composer.lock *.log !**.gitkeep +/*-backup.csv +/formats/php/TldEnum/*-backup.php +/formats/json/*-backup.json +/formats/js/*-backup.js \ No newline at end of file diff --git a/assets/tld-desc.csv b/assets/tld-desc.csv new file mode 100644 index 0000000..b0ecec6 --- /dev/null +++ b/assets/tld-desc.csv @@ -0,0 +1,284 @@ +ac,Ascension Island +ad,Andorra (Principality of) +ae,United Arab Emirates +aero,Air-transport industry +af,Afghanistan (Islamic Republic of) +ag,Antigua and Barbuda +ai,Anguilla +al,Albania (Republic of) +am,Armenia (Republic of) +an,Netherlands Antilles +ao,Angola (Republic of) +aq,Antarctica +ar,Argentina (Argentine Republic) +arpa,Address and Routing Parameter Area +as,American Samoa +asia,Organisations and individuals in the Asia-Pacific region +at,Austria (Republic of) +au,Australia (Commonwealth of) +aw,Aruba +ax,Åland Islands +az,Azerbaijan (Republic of) +ba,Bosnia and Herzegovina +bb,Barbados +bd,Bangladesh (People's Republic of) +be,Belgium (Kingdom of) +bf,Burkina Faso +bg,Bulgaria (Republic of) +bh,Bahrain (Kingdom of) +bi,Burundi (Republic of) +biz,Business +bj,Benin (Republic of) +bl,Saint Barthélemy (Collectivity of) {unassigned - see also: .gp and .fr} +bm,Bermuda +bn,Brunei (Nation of Brunei - the Abode of Peace) [Negara Brunei Darussalam] +bo,Bolivia (Plurinational State of) +bq,Caribbean Netherlands [Bonaire - Sint Eustatius and Saba] {unassigned - see also: .an and .nl} +br,Brazil (Federative Republic of) +bs,Bahamas (Commonwealth of the) +bt,Bhutan (Kingdom of) +bv,Bouvet Island +bw,Botswana (Republic of) +by,Belarus (Republic of) +bz,Belize +ca,Canada +cat,Catalan +cc,Cocos (Keeling) Islands (Territory of the) +cd,Congo (Democratic Republic of the) [Congo-Kinshasa] +cf,Central African Republic +cg,Congo (Republic of) [Congo-Brazzaville] +ch,Switzerland (Swiss Confederation) +ci,Ivory Coast (Republic of Côte d'Ivoire) +ck,Cook Islands +cl,Chile (Republic of) +cm,Cameroon (Republic of) +cn,China (People's Republic of) +co,Colombia (Republic of) +com,Commercial organizations +coop,Cooperatives +cr,Costa Rica (Republic of) +cs,Czechoslovakia {formerly - retired 1995 - see also: .cz and .sk} +cu,Cuba (Republic of) +cv,Cape Verde (Republic of) +cw,Curaçao (Country of) +cx,Christmas Island (Territory of) +cy,Cyprus (Republic of) +cz,Czech Republic +dd,German Democratic Republic [East Germany] {formerly - retired} +de,Germany (Federal Republic of) +dj,Djibouti (Republic of) +dk,Denmark (Kingdom of) +dm,Dominica (Commonwealth of) +do,Dominican Republic +dz,Algeria (People's Democratic Republic of) +ec,Ecuador (Republic of) +edu,Educational establishments +ee,Estonia (Republic of) +eg,Egypt (Arab Republic of) +eh,Western Sahara {reserved} +er,Eritrea (State of) +es,Spain (Kingdom of) +et,Ethiopia (Federal Democratic Republic of) +eu,European Union +fi,Finland (Republic of) +fj,Fiji (Republic of) +fk,Falkland Islands (Malvinas) +fm,Micronesia (Federated States of) +fo,Faroe Islands +fr,France (French Republic) +ga,Gabon (Gabonese Republic) +gb,United Kingdom (United Kingdom of Great Britain and Northern Ireland) +gd,Grenada +ge,Georgia +gf,French Guiana +gg,Guernsey (Bailiwick of) +gh,Ghana (Republic of) +gi,Gibraltar +gl,Greenland +gm,Gambia (Republic of The) +gn,Guinea (Republic of) +gov,US government +gp,Guadeloupe +gq,Equatorial Guinea (Republic of) +gr,Greece (Hellenic Republic) +gs,South Georgia and the South Sandwich Islands +gt,Guatemala (Republic of) +gu,Guam +gw,Guinea-Bissau (Republic of) +gy,Guyana (Co-operative Republic of) +hk,Hong Kong (Hong Kong Special Administrative Region of the People's Republic of China) +hm,Heard Island and McDonald Islands +hn,Honduras (Republic of) +hr,Croatia (Republic of) +ht,Haiti (Republic of) +hu,Hungary +id,Indonesia (Republic of) +ie,Ireland (Republic of) +il,Israel (State of) +im,Isle of Man +in,India (Republic of) +info,Informational sites +int,International treaty-based organizations +io,British Indian Ocean Territory +iq,Iraq (Republic of) +ir,Iran (Islamic Republic of) +is,Iceland +it,Italy (Italian Republic) +je,Jersey (Bailiwick of) +jm,Jamaica (Commonwealth of) +jo,Jordan (Hashemite Kingdom of) +jobs,Employment-related sites +jp,Japan +ke,Kenya (Republic of) +kg,Kyrgyzstan (Kyrgyz Republic) +kh,Cambodia (Kingdom of) +ki,Kiribati (Republic of) +km,Comoros (Union of the) +kn,Saint Kitts and Nevis (Federation of) +kp,Korea (Democratic People's Republic of) [North Korea] +kr,Korea (Republic of) [South Korea] +kw,Kuwait (State of Kuwait) +ky,Cayman Islands +kz,Kazakhstan (Republic of) +la,Laos (Lao People's Democratic Republic) +lb,Lebanon (Lebanese Republic) +lc,Saint Lucia +li,Liechtenstein (Principality of) +lk,Sri Lanka (Democratic Socialist Republic of) +local,Pseudo-Domain for Multicast DNS +lr,Liberia (Republic of) +ls,Lesotho (Kingdom of) +lt,Lithuania (Republic of) +lu,Luxembourg (Grand Duchy of) +lv,Latvia (Republic of) +ly,Libya +ma,Morocco +mc,Monaco (Principality of) +md,Moldova (Republic of) +me,Montenegro +mf,Saint Martin (Collectivity of) {unassigned - see also: .gp and .fr} +mg,Madagascar (Republic of) +mh,Marshall Islands (Republic of the) +mil,US military +mk,Macedonia (Republic of) +ml,Mali (Republic of) +mm,Myanmar (Republic of the Union of) [Burma] +mn,Mongolia +mo,Macau (Macau Special Administrative Region of the People's Republic of China) [Macao] +mobi,Mobile +mp,Northern Mariana Islands (Commonwealth of the) +mq,Martinique +mr,Mauritania (Islamic Republic of) +ms,Montserrat +mt,Malta (Republic of) +mu,Mauritius (Republic of) +museum,Museums +mv,Maldives (Republic of) +mw,Malawi (Republic of) +mx,Mexico (United Mexican States) +my,Malaysia +mz,Mozambique (Republic of) +na,Namibia (Republic of) +name,Individuals +nato,NATO sites and operations {formerly - retired 1996 - never used} +nc,New Caledonia +ne,Niger (Republic of) +net,Network +nf,Norfolk Island (Territory of) +ng,Nigeria (Federal Republic of) +ni,Nicaragua (Republic of) +nl,Netherlands +no,Norway (Kingdom of) +np,Nepal (Federal Democratic Republic of) +nr,Nauru (Republic of) +nu,Niue +nz,New Zealand +om,Oman (Sultanate of) +onion,Pseudo-Domain for TOR (The Onion Router) +org,Non-profit organizations +pa,Panama (Republic of) +pe,Peru (Republic of) +pf,French Polynesia and Clipperton Island +pg,Papua New Guinea (Independent State of) +ph,Philippines (Republic of the) +pk,Pakistan (Islamic Republic of) +pl,Poland (Republic of) +pm,Saint Pierre and Miquelon +pn,Pitcairn Islands (Pitcairn - Henderson - Ducie and Oeno Islands) +pr,Puerto Rico (Commonwealth of) +pro,Profession +ps,Palestine (State of) +pt,Portugal (Portuguese Republic) +pw,Palau (Republic of) +py,Paraguay (Republic of) +qa,Qatar (State of) +re,Réunion +ro,Romania +rs,Serbia (Republic of) +ru,Russia (Russian Federation) +rw,Rwanda (Republic of) +sa,Saudi Arabia (Kingdom of) +sb,Solomon Islands +sc,Seychelles (Republic of) +sd,Sudan (Republic of) +se,Sweden (Kingdom of) +sg,Singapore (Republic of) +sh,Saint Helena +si,Slovenia (Republic of) +sj,Svalbard and Jan Mayen {not in use - see also: .no} +sk,Slovakia (Slovak Republic) +sl,Sierra Leone (Republic of) +sm,San Marino (Republic of) +sn,Senegal (Republic of) +so,Somalia (Federal Republic of) +sr,Suriname (Republic of) +ss,South Sudan (Republic of) +st,São Tomé and Príncipe (Democratic Republic of) +su,Soviet Union (Union of Soviet Socialist Republics) +sv,El Salvador (Republic of) +sx,Sint Maarten +sy,Syria (Syrian Arab Republic) +sz,Swaziland (Kingdom of) +tc,Turks and Caicos Islands +td,Chad (Republic of) +tel,Telephone +tf,French Southern and Antarctic Lands (Territory of the) +tg,Togo (Togolese Republic) +th,Thailand (Kingdom of) +tj,Tajikistan (Republic of) +tk,Tokelau +tl,Timor-Leste (Democratic Republic of) [East Timor] +tm,Turkmenistan +tn,Tunisia (Republic of) +to,Tonga (Kingdom of) +tp,Timor-Leste (Democratic Republic of) [East Timor] {being phased out - also see: .tl} +tr,Turkey (Republic of) +travel,Travel +tt,Trinidad and Tobago (Republic of) +tv,Tuvalu +tw,Taiwan (Republic of China) +tz,Tanzania (United Republic of) +ua,Ukraine +ug,Uganda (Republic of) +uk,United Kingdom (United Kingdom of Great Britain and Northern Ireland) +um,United States Minor Outlying Islands {formerly - retired 2010 - see also: .us} +us,United States of America and United States Minor Outlying Islands +uy,Uruguay (Oriental Republic of) +uz,Uzbekistan (Republic of) +va,Vatican City (Vatican City State) +vc,Saint Vincent and the Grenadines +ve,Venezuela (Bolivarian Republic of) +vg,British Virgin Islands (Virgin Islands) +vi,United States Virgin Islands (United States Virgin Islands) +vn,Vietnam (Socialist Republic of) +vu,Vanuatu (Republic of) +wf,Wallis and Futuna (Territory of the Wallis and Futuna Islands) +ws,Samoa (Independent State of) +xxx,Adult entertainment +ye,Yemen (Republic of) +yt,Mayotte (Department of) +yu,Yugoslavia and Serbia and Montenegro {formerly - retired 2010 - see also: .me and .rs} +za,South Africa (Republic of) +zm,Zambia (Republic of) +zr,Zaire (Republic of) {formerly - retired 2001 - see also: .cd} +zw,Zimbabwe (Republic of) diff --git a/bin/helpers/generate-js-tld-enum.js b/bin/helpers/generate-js-tld-enum.js old mode 100644 new mode 100755 index 36c686e..c1ae8f6 --- a/bin/helpers/generate-js-tld-enum.js +++ b/bin/helpers/generate-js-tld-enum.js @@ -1,7 +1,106 @@ #!/usr/bin/env node -console.log("generates the 'tld-enum.js' node source file from the 'tlds.csv' csv file"); -console.log("---this script is currently a 'to-do' placeholder!---"); -console.error("terminating... the development for the 'JSON' format generator has not yet been completed"); +const meName = 'generate-js-tld-enum.js'; -process.exit(1); \ No newline at end of file +process.on('unhandledRejection', error => { + console.error(meName + ": (FATAL)", error); + process.exit(1); +}); + +const countries = require('country-data').countries; +const country = require('countryjs'); +const parse = require('csv-parse'); +const fs = require('fs-extra'); +const path = require('path'); +const md5File = require('md5-file/promise'); +const pathinfo = require('pathinfo'); +const program = require('commander'); +const tmp = require('tmp'); + +//tmp.setGracefulCleanup(); + +const fileTldListJs = path.dirname(require.main.filename) + '/../../formats/js/tld-enum.js'; +const fileTldsCsv = path.dirname(require.main.filename) + '/../../tlds.csv'; + +program + .option('-q, --quiet', 'Quiet Mode') + .parse(process.argv); + +if (!program.quiet) { + console.log(meName); + console.log(" (c) 2017 Doug Bird, All Rights Reserved."); + console.log(" see README.md for licensing and other information"); + console.log(" https://github.com/katmore/tld-enum#readme"); + console.log(""); + console.log(" Generates new javascript format files from the 'tlds.csv' file"); + console.log(""); +} + +(async() => { + + const tldEnumStartTldList = 'exports.tldList = '; + const tldEnumEndTldList = ';'; + + //const tmpDir = tmp.dirSync({ unsafeCleanup: true }); + const tmpDir = tmp.dirSync(); + + const fileNewTldListJs = tmpDir.name + '/tld-enum.js'; + + let existingMd5 = null; + + if (fs.existsSync(fileTldListJs)) { + existingMd5 = await md5File(fileTldListJs); + const pathinfoTlds = pathinfo(fileTldListJs); + const fileBackupTlds = pathinfoTlds.dirname + pathinfoTlds.sep + pathinfoTlds.basename + '-' + existingMd5 + '-backup.js'; + if (!fs.existsSync(fileBackupTlds)) { + fs.copySync(fileTldListJs, fileBackupTlds); + } + } + + process.stdout.write("reading 'tlds.csv'..."); + + let parser = parse({ delimiter: ',' }); + + let tldEnum = []; + + parser.on('readable', function() { + let i = 0; + let row; + while (row = parser.read()) { + if (!row.length) { + console.error(meName + ": (FATAL) invalid 'tlds.csv' row #" + i + ": " + fileTldsCsv); + process.exit(1); + } + tldEnum.push(row[0]); + i++; + } + }); + + parser.write(fs.readFileSync(fileTldsCsv)); + + parser.end(); + + console.log("done"); + + process.stdout.write("generating new 'tld-enum.js' file..."); + + fs.writeFileSync(fileNewTldListJs, tldEnumStartTldList); + + fs.appendFileSync(fileNewTldListJs, JSON.stringify(tldEnum, null, 2)); + + fs.appendFileSync(fileNewTldListJs, tldEnumEndTldList); + + console.log("done"); + + if (existingMd5) { + const newMd5 = await md5File(fileNewTldListJs); + if (newMd5 == existingMd5) { + console.error(meName + ": (NOTICE) ignoring newly generated 'tld-enum.js' file that is identical to the existing file (md5: " + existingMd5 + ", path: " + fileTldListJs + ")"); + return; + } + } + fs.copySync(fileNewTldListJs, fileTldListJs); + + console.log("saved new 'tld-enum.js' file"); + +})(); \ No newline at end of file diff --git a/bin/helpers/generate-json-tld-enum.js b/bin/helpers/generate-json-tld-enum.js old mode 100644 new mode 100755 index 9512af4..71ca7e9 --- a/bin/helpers/generate-json-tld-enum.js +++ b/bin/helpers/generate-json-tld-enum.js @@ -1,7 +1,97 @@ #!/usr/bin/env node -console.log("generates the 'tld-list.json' JSON array file from the 'tlds.csv' csv file"); -console.log("---this script is currently a 'to-do' placeholder!---"); -console.error("terminating... the development for the 'JSON' format generator has not yet been completed"); +const meName = 'generate-json-tld-enum.js'; -process.exit(1); \ No newline at end of file +process.on('unhandledRejection', error => { + console.error(meName + ": (FATAL)", error); + process.exit(1); +}); + +const countries = require('country-data').countries; +const country = require('countryjs'); +const parse = require('csv-parse'); +const fs = require('fs-extra'); +const path = require('path'); +const md5File = require('md5-file/promise'); +const pathinfo = require('pathinfo'); +const program = require('commander'); +const tmp = require('tmp'); + +tmp.setGracefulCleanup(); + +const fileTldListJson = path.dirname(require.main.filename) + '/../../formats/json/tld-list.json'; +const fileTldsCsv = path.dirname(require.main.filename) + '/../../tlds.csv'; + +program + .option('-q, --quiet', 'Quiet Mode') + .parse(process.argv); + +if (!program.quiet) { + console.log(meName); + console.log(" (c) 2017 Doug Bird, All Rights Reserved."); + console.log(" see README.md for licensing and other information"); + console.log(" https://github.com/katmore/tld-enum#readme"); + console.log(""); + console.log(" Generates new JSON format files from the 'tlds.csv' file"); + console.log(""); +} + +(async() => { + + const tmpDir = tmp.dirSync({ unsafeCleanup: true }); + + const fileNewTldListJson = tmpDir.name + '/tld-list.json'; + + let existingMd5 = null; + + if (fs.existsSync(fileTldListJson)) { + existingMd5 = await md5File(fileTldListJson); + const pathinfoTlds = pathinfo(fileTldListJson); + const fileBackupTlds = pathinfoTlds.dirname + pathinfoTlds.sep + pathinfoTlds.basename + '-' + existingMd5 + '-backup.json'; + if (!fs.existsSync(fileBackupTlds)) { + fs.copySync(fileTldListJson, fileBackupTlds); + } + } + + process.stdout.write("reading 'tlds.csv'..."); + + let parser = parse({ delimiter: ',' }); + + let tldEnum = []; + + parser.on('readable', function() { + let i = 0; + let row; + while (row = parser.read()) { + if (!row.length) { + console.error(meName + ": (FATAL) invalid 'tlds.csv' row #" + i + ": " + fileTldsCsv); + process.exit(1); + } + tldEnum.push(row[0]); + i++; + } + }); + + parser.write(fs.readFileSync(fileTldsCsv)); + + parser.end(); + + console.log("done"); + + process.stdout.write("generating new 'tld-list.json' file..."); + + fs.writeFileSync(fileNewTldListJson, JSON.stringify(tldEnum, null, 2)); + + console.log("done"); + + if (existingMd5) { + const newMd5 = await md5File(fileNewTldListJson); + if (newMd5 == existingMd5) { + console.error(meName + ": (NOTICE) ignoring newly generated 'tld-list.json' file that is identical to the existing file (md5: " + existingMd5 + ", path: " + fileTldListJson + ")"); + return; + } + } + fs.copySync(fileNewTldListJson, fileTldListJson); + console.log("saved new 'tld-list.json' file"); + +})(); \ No newline at end of file diff --git a/bin/helpers/generate-php-tld-enum.php b/bin/helpers/generate-php-tld-enum.php old mode 100644 new mode 100755 index bc7e644..ab5ddd7 --- a/bin/helpers/generate-php-tld-enum.php +++ b/bin/helpers/generate-php-tld-enum.php @@ -1,17 +1,179 @@ #!/usr/bin/env php ','',$tldEnumExport); + $tldEnumExport = preg_replace('/[0-9]+/', '', $tldEnumExport); + + //$tldEnumExport = json_encode($tldEnum,\JSON_PRETTY_PRINT); + + if (false === file_put_contents($newTldEnumFile, $tldEnumExport,\FILE_APPEND)) { + static::_echo_error("(FATAL) failed to write to new 'TldEnum.php' file",1); + } + + if (false === file_put_contents($newTldEnumFile,static::TLD_ENUM_SOURCE_END_TLD_ENUM_CONST,\FILE_APPEND)) { + static::_echo_error("(FATAL) failed to write to new 'TldEnum.php' file",1); + } + + if (false === file_put_contents($newTldEnumFile,static::TLD_ENUM_SOURCE_END_CLASS,\FILE_APPEND)) { + static::_echo_error("(FATAL) failed to write to new 'TldEnum.php' file",1); + } + + echo "done\n"; + + if ($existingMd5!==null) { + $newTldEnumMd5 = md5_file($newTldEnumFile); + if ($existingMd5 == $newTldEnumMd5) { + static::_echo_error("(NOTICE) ignoring newly generated 'TldEnum.php' file that is identical to the existing file (md5: $existingMd5, path: $tldEnumFile)"); + return; + } + if (!unlink($tldEnumFile)) { + static::_echo_error("(FATAL) failed to remove stale 'TldEnum.php': $tldEnumFile",1); + } + } + + if (!copy($newTldEnumFile,$tldEnumFile)) { + static::_echo_error("(FATAL) failed to save new 'TldEnum.php': $tldEnumFile",1); + } + + echo "saved new 'TldEnum.php' file\n"; + + + } + + private static function _echo_error(string $str, int $fatal_exit_status=null) : void { + if (substr($str,0,1)!=="\n") { + $str .= "\n"; + } + $str = static::ME_NAME . ": ".$str; + if (\PHP_SAPI=='cli') { + fwrite(\STDERR,$str); + } else { + echo $str ; + } + if (is_int($fatal_exit_status)) { + exit($fatal_exit_status); + } + } + + const TLD_ENUM_SOURCE_START_CLASS = << { + console.error(meName + ": (FATAL)", error); + process.exit(1); +}); + +const request = require('async-request'); +const cheerio = require('cheerio'); +const countries = require('country-data').countries; +const country = require('countryjs'); +const stringify = require('csv-stringify'); +const parse = require('csv-parse'); +const fs = require('fs-extra'); +const path = require('path'); +const md5File = require('md5-file/promise'); +const pathinfo = require('pathinfo'); +const program = require('commander'); +const tmp = require('tmp'); + +tmp.setGracefulCleanup(); + +const fileTldDescCsv = path.dirname(require.main.filename) + '/../../assets/tld-desc.csv'; +const fileTldsCsv = path.dirname(require.main.filename) + '/../../tlds.csv'; +const urlTldsAlpha = 'http://data.iana.org/TLD/tlds-alpha-by-domain.txt'; +const urlDomainsDb = 'https://www.iana.org/domains/root/db'; + +program + .option('-q, --quiet', 'Quiet Mode') + .parse(process.argv); + +if (!program.quiet) { + console.log(meName); + console.log(" (c) 2017 Doug Bird, All Rights Reserved."); + console.log(" see README.md for licensing and other information"); + console.log(" https://github.com/katmore/tld-enum#readme"); + console.log(""); + console.log(" Generates the canonical 'tlds.csv' csv file by downloading resources from iana.org"); + console.log(""); +} + +(async() => { + + const tmpDir = tmp.dirSync({ unsafeCleanup: true }); + + process.stdout.write("downloading '" + urlTldsAlpha + "'..."); + + const responseTldsAlpha = await request(urlTldsAlpha); + if (responseTldsAlpha.statusCode != 200) { + console.log("error"); + console.error(meName + ": (FATAL) response status code " + responseTldsAlpha.statusCode + " from URL '" + urlTldsAlpha + "'"); + process.exit(1); + return; + } + if (!responseTldsAlpha.body) { + console.log("error"); + console.error(meName + ": (FATAL) empty response body " + responseTldsAlpha.statusCode + " from URL '" + urlTldsAlpha + "'"); + process.exit(1); + return; + } + + const fileTldsAlphaTxt = tmpDir.name + '/tlds-alpha-by-domain.txt'; + const fileNewTldsCsv = tmpDir.name + '/tlds.csv'; + + fs.writeFileSync(fileTldsAlphaTxt, responseTldsAlpha.body, 'utf8'); + fs.writeFileSync(fileNewTldsCsv, '', 'utf8'); + + console.log('success'); + + process.stdout.write("downloading '" + urlDomainsDb + "'..."); + const responseDomainsDb = await request(urlDomainsDb); + if (responseDomainsDb.statusCode != 200) { + console.log("error"); + console.error(meName + ": (FATAL) response status code " + responseDomainsDb.statusCode + " from URL '" + urlDomainsDb + "'"); + process.exit(1); + return; + } + if (!responseDomainsDb.body) { + console.log("error"); + console.error(meName + ": (FATAL) empty response body " + responseDomainsDb.statusCode + " from URL '" + urlDomainsDb + "'"); + process.exit(1); + return; + } + const htmlDomainsDb = responseDomainsDb.body; + var $ = cheerio.load(htmlDomainsDb); + console.log('success'); + + process.stdout.write("building country / TLD hashmap..."); + + let tld2CountryName = {}; + let missingTld = []; + + countries.all.forEach((c) => { + + let tld = country.tld(c.alpha3, 'ISO3'); + if (!tld) { + missingTld.push(c.alpha3); + return; + } + tld2CountryName[tld] = c.name; + }); + + console.log('done'); + //console.error('NOTICE: the following "countries" did not have an assigned top level domain: ' + missingTld.join(', ')); + + process.stdout.write("building description / TLD hashmap..."); + let tld2Desc = {}; + let parser = parse({ delimiter: ',' }); + const csvPosMap = { + domain: 0, + description: 1, + } + parser.on('readable', function() { + let tldData; + while (tldData = parser.read()) { + let tld = { + domain: null, + description: null, + }; + let prop; + for (prop in tld) { + if (typeof(tldData[csvPosMap[prop]]) !== 'undefined') { + tld[prop] = tldData[csvPosMap[prop]]; + } + } + if (tld.domain && tld.description) { + tld2Desc[tld.domain] = tld.description; + } + } + }); + + parser.write(fs.readFileSync(fileTldDescCsv)); + + parser.end(); + + console.log("done"); + + const tdPosMap = { + domain: 0, + type: 1, + manager: 2, + }; + + let tldSet = []; + + process.stdout.write("parsing IANA data..."); + $('#tld-table').find('tr').each((i, element) => { + let tld = { + domain: null, + type: null, + manager: null, + }; + let tldData = []; + // console.log('i ' + i); + // console.log(element); + $(element).find("td").each((iTd, elementTd) => { + // console.log('iTd...'); + // console.log(iTd); + tldData.push($(elementTd).text()); + }); + + for (var prop in tld) { + if (typeof(tldData[tdPosMap[prop]]) !== 'undefined') { + tld[prop] = tldData[tdPosMap[prop]]; + } + } + + if (!tld.domain) { + return; + } + + tld.domain = tld.domain.replace(/\s/g, '').replace(/\./g, ''); + + tldSet.push(tld); + + }); + console.log('done'); + + const stringifier = stringify({ delimiter: ',' }); + stringifier.on('readable', () => { + let row; + while (row = stringifier.read()) { + fs.appendFileSync(fileNewTldsCsv, row, 'utf8') + } + }); + + process.stdout.write("serializing new 'tlds.csv'..."); + for (var i = 0; i < tldSet.length; i++) { + let tld = tldSet[i]; + let csvRow = [tld.domain]; + if ((tld.type == 'country-code') && (typeof(tld2CountryName[tld.domain]) !== 'undefined')) { + csvRow.push(tld2CountryName[tld.domain]); + } else { + if (typeof(tld2Desc[tld.domain]) !== 'undefined') { + csvRow.push(tld2Desc[tld.domain]); + } else { + csvRow.push(tld.manager); + } + } + csvRow.push(tld.type); + stringifier.write(csvRow); + + } + stringifier.end(); + console.log('done'); + + if (fs.existsSync(fileTldsCsv)) { + const newMd5 = await md5File(fileNewTldsCsv); + const csvMd5 = await md5File(fileTldsCsv); + if (csvMd5 == newMd5) { + console.error(meName + ": (NOTICE) ignoring newly generated 'tlds.csv' file that is identical to the existing file (md5: " + csvMd5 + ", path: " + fileTldsCsv + ")"); + return; + } + const pathinfoTldsCsv = pathinfo(fileTldsCsv); + const fileBackupTldsCsv = pathinfoTldsCsv.dirname + pathinfoTldsCsv.sep + pathinfoTldsCsv.basename + '-' + csvMd5 + '-backup.csv'; + if (!fs.existsSync(fileBackupTldsCsv)) { + fs.copySync(fileTldsCsv, fileBackupTldsCsv); + } + } + + process.stdout.write("saving new 'tlds.csv'..."); + fs.copySync(fileNewTldsCsv, fileTldsCsv); + console.log('done'); + +})(); \ No newline at end of file diff --git a/bin/update-formats b/bin/update-formats old mode 100644 new mode 100755 index b4d89c6..a3dc0fc --- a/bin/update-formats +++ b/bin/update-formats @@ -1,4 +1,161 @@ -#!/usr/bin/env node - -console.log("updates the 'tlds.csv' file from iana.org and re-generates the native format files in the 'format/' directory"); -console.log("---this script is currently a 'to-do' placeholder!---"); \ No newline at end of file +#!/bin/bash +################################################################################ +## "update-formats" +## Updates the 'tlds.csv' file from iana.org and re-generates the native +## format files +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ +## script localization +################################################################################ +ME_NAME="update-formats" +ME_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +HELPER_DIR=$ME_DIR/helpers +################################################################################ +## enforce dependencies +################################################################################ +DEPENDENCY_SET=(node php) +DEPENDENCY_STATUS=0 +for DEP_CMD in "${DEPENDENCY_SET[@]}" +do + which $DEP_CMD > /dev/null 2>&1 + DEP_STATUS=$? + if [ "$DEP_STATUS" -ne "0" ]; then + >&2 echo -e "$ME_NAME: (NOTICE) failed dependency check for '$DEP_CMD', command is missing or inaccessible" + DEPENDENCY_STATUS=1 + fi +done +if [ "$DEPENDENCY_STATUS" -ne "0" ]; then + >&2 echo -e "$ME_NAME: (FATAL) one or more dependency checks failed" + exit 1 +fi +################################################################################ +## read options and flags +################################################################################ +QUIET_MODE=0 +typeset -A SCRIPT_OPTS_MAP +SCRIPT_OPTS_MAP=( + [quiet]=q +) +SCRIPT_OPTS="::-:q" +#== parse options ==# +while getopts ${SCRIPT_OPTS} OPTION ; do + #== translate long options to short ==# + OPTREF="-$OPTARG" + if [[ "x$OPTION" == "x-" ]]; then + LONG_OPTION=$OPTARG + LONG_OPTARG=$(echo $LONG_OPTION | grep "=" | cut -d'=' -f2) + LONG_OPTIND=-1 + [[ "x$LONG_OPTARG" = "x" ]] && LONG_OPTIND=$OPTIND || LONG_OPTION=$(echo $OPTARG | cut -d'=' -f1) + [[ $LONG_OPTIND -ne -1 ]] && eval LONG_OPTARG="\$$LONG_OPTIND" + OPTION=${SCRIPT_OPTS_MAP[$LONG_OPTION]} + [[ "x$OPTION" = "x" ]] && OPTION="?" OPTARG="-$LONG_OPTION" + OPTREF="--$LONG_OPTION" + if [[ $( echo "${SCRIPT_OPTS}" | grep -c "${OPTION}:" ) -eq 1 ]]; then + if [[ "x${LONG_OPTARG}" = "x" ]] || [[ "${LONG_OPTARG}" = -* ]]; then + OPTION=":" OPTARG="-$LONG_OPTION" + else + OPTARG="$LONG_OPTARG"; + if [[ $LONG_OPTIND -ne -1 ]]; then + [[ $OPTIND -le $Optnum ]] && OPTIND=$(( $OPTIND+1 )) + shift $OPTIND + OPTIND=1 + fi + fi + fi + fi + + #== options follow by another option instead of argument ==# + if [[ "x${OPTION}" != "x:" ]] && [[ "x${OPTION}" != "x?" ]] && [[ "${OPTARG}" = -* ]]; then + OPTARG="$OPTION" OPTION=":" + fi + + #== manage options ==# + case "$OPTION" in + q) + QUIET_MODE=1 + ;; + : ) >&2 echo "${ME_NAME}: (FATAL) $OPTREF option requires a value" && echo -e "$ME_USAGE" && exit 1 ;; + ? ) >&2 echo "${ME_NAME}: (FATAL) $OPTREF is an unknown option" && echo -e "$ME_USAGE" && exit 1 ;; + esac +done +shift $((${OPTIND} - 1)) +################################################################################ +## display welcome message +################################################################################ +if [ "$QUIET_MODE" -ne "1" ]; then + echo "update-formats" + echo " (c) 2017 Doug Bird, All Rights Reserved." + echo " see README.md for licensing and other information" + echo " https://github.com/katmore/tld-enum#readme" + echo "" + echo " Updates the 'tlds.csv' file from iana.org and re-generates the native format files" + echo "" +fi + +################################################################################ +## run the 'generate-tlds-csv.js' helper +################################################################################ +CUR_HELPER_LABEL="new 'tlds.csv'" +CUR_HELPER_CMD=generate-tlds-csv.js +echo -e "generate $CUR_HELPER_LABEL: started\n" +$HELPER_DIR/$CUR_HELPER_CMD -q +CMD_STATUS=$? +# terminate if helper failed +if [ "$CMD_STATUS" -ne "0" ]; then + >&2 echo "$ME_NAME: (FATAL) helper for $CUR_HELPER_LABEL failed ($CUR_HELPER_CMD exit status $CMD_STATUS)" + exit $CMD_STATUS +fi +echo -e "\ngenerate new $CUR_HELPER_LABEL: success" + +################################################################################ +## run the 'generate-php-tld-enum.php' helper +################################################################################ +CUR_HELPER_LABEL="new PHP format files" +CUR_HELPER_CMD=generate-php-tld-enum.php +echo -e "generate $CUR_HELPER_LABEL: started\n" +$HELPER_DIR/$CUR_HELPER_CMD -q +CMD_STATUS=$? +# terminate if helper failed +if [ "$CMD_STATUS" -ne "0" ]; then + >&2 echo "$ME_NAME: (FATAL) helper for $CUR_HELPER_LABEL failed ($CUR_HELPER_CMD exit status $CMD_STATUS)" + exit $CMD_STATUS +fi +echo -e "\ngenerate new $CUR_HELPER_LABEL: success" + +################################################################################ +## run the 'generate-js-tld-enum.js' helper +################################################################################ +CUR_HELPER_LABEL="new JavaScript format files" +CUR_HELPER_CMD=generate-js-tld-enum.js +echo -e "generate $CUR_HELPER_LABEL: started\n" +$HELPER_DIR/$CUR_HELPER_CMD -q +CMD_STATUS=$? +# terminate if helper failed +if [ "$CMD_STATUS" -ne "0" ]; then + >&2 echo "$ME_NAME: (FATAL) helper for $CUR_HELPER_LABEL failed ($CUR_HELPER_CMD exit status $CMD_STATUS)" + exit $CMD_STATUS +fi +echo -e "\ngenerate new $CUR_HELPER_LABEL: success" + +################################################################################ +## run the 'generate-json-tld-enum.js' helper +################################################################################ +CUR_HELPER_LABEL="new JSON format files" +CUR_HELPER_CMD=generate-json-tld-enum.js +echo -e "generate $CUR_HELPER_LABEL: started\n" +$HELPER_DIR/$CUR_HELPER_CMD -q +CMD_STATUS=$? +# terminate if helper failed +if [ "$CMD_STATUS" -ne "0" ]; then + >&2 echo "$ME_NAME: (FATAL) helper for $CUR_HELPER_LABEL failed ($CUR_HELPER_CMD exit status $CMD_STATUS)" + exit $CMD_STATUS +fi +echo -e "\ngenerate new $CUR_HELPER_LABEL: success" \ No newline at end of file diff --git a/package.json b/package.json index 89a027f..2fefe84 100644 --- a/package.json +++ b/package.json @@ -1,27 +1,38 @@ { - "name": "tld-enum", - "version": "1.0.4", - "description": "Lists of every ICANN TLD in formats that can be natively compiled in various language targets", - "main": "formats/js/tld-enum.js", - "bin": { - "tld-enum": "generate-json-tld-enum.js" - }, - "scripts": { - "test": "echo \"Error: no test specified\" && exit 1" - }, - "repository": { - "type": "git", - "url": "git+https://github.com/katmore/tld-enum.git" - }, - "author": "Doug Bird", - "license": "MIT", - "bugs": { - "url": "https://github.com/katmore/tld-enum/issues" - }, - "homepage": "https://github.com/katmore/tld-enum#readme", - "dependencies": { - "async-request": "^1.2.0", - "cheerio": "^1.0.0-rc.2", - "request": "^2.83.0" - } -} \ No newline at end of file + "name": "tld-enum", + "version": "1.0.4", + "description": "Lists of every ICANN TLD in formats that can be natively compiled in various language targets", + "main": "formats/js/tld-enum.js", + "bin": { + "tld-enum": "generate-json-tld-enum.js" + }, + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/katmore/tld-enum.git" + }, + "author": "Doug Bird", + "license": "MIT", + "bugs": { + "url": "https://github.com/katmore/tld-enum/issues" + }, + "homepage": "https://github.com/katmore/tld-enum#readme", + "dependencies": { + "async-request": "^1.2.0", + "cheerio": "^1.0.0-rc.2", + "commander": "^2.12.2", + "country-data": "0.0.31", + "countryjs": "^1.8.0", + "csv-parse": "^2.0.0", + "csv-stringify": "^2.0.0", + "es6-promisify": "^5.0.0", + "fs": "0.0.1-security", + "fs-extra": "^5.0.0", + "md5-file": "^3.2.3", + "pathinfo": "^0.1.0", + "request": "^2.83.0", + "tmp": "0.0.33" + } +}