-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsmall-result-python.txt
More file actions
1 lines (1 loc) · 34.9 KB
/
small-result-python.txt
File metadata and controls
1 lines (1 loc) · 34.9 KB
1
{'Apache': 79, 'Hadoop': 201, 'From': 3, 'Wikipedia,': 1, 'the': 211, 'free': 1, 'encyclopedia': 1, 'Jump': 2, 'to': 122, 'navigation': 1, 'search': 4, 'This': 17, 'article': 5, 'has': 14, 'multiple': 11, 'issues.': 1, 'Please': 2, 'help': 2, 'improve': 2, 'it': 14, 'or': 20, 'discuss': 1, 'these': 4, 'issues': 2, 'on': 65, 'talk': 2, 'page.': 1, '(Learn': 1, 'how': 1, 'and': 129, 'when': 4, 'remove': 1, 'template': 1, 'messages)': 1, 'contains': 6, 'content': 2, 'that': 38, 'is': 66, 'written': 4, 'like': 1, 'an': 17, 'advertisement.': 1, '(October': 2, '2013)': 2, 'appears': 1, 'contain': 1, 'a': 106, 'large': 4, 'number': 11, 'of': 138, 'buzzwords.': 1, 'may': 3, 'be': 24, 'too': 1, 'technical': 2, 'for': 60, 'most': 1, 'readers': 1, 'understand.': 1, 'make': 1, 'understandable': 1, 'non-experts,': 1, 'without': 4, 'removing': 1, 'details.': 1, '(May': 1, '2017)': 1, 'logo': 1, 'new.svg': 1, 'Developer(s)': 1, 'Software': 12, 'Foundation': 5, 'Initial': 1, 'release': 3, 'December': 24, '10,': 1, '2011;': 1, '6': 3, 'years': 2, 'ago[1]': 1, 'Stable': 1, '3.0.0': 1, '/': 2, '13,': 1, '2017[2]': 1, 'Repository': 1, 'https://git-wip-us.apache.org/repos/asf/hadoop.git': 1, 'Edit': 5, 'this': 13, 'at': 19, 'Wikidata': 4, 'Written': 1, 'in': 68, 'Java': 11, 'Operating': 2, 'system': 30, 'Cross-platform': 1, 'Type': 1, 'Distributed': 11, 'file': 40, 'License': 2, '2.0': 3, 'Website': 1, 'hadoop.apache.org': 1, '(': 1, '/həˈduːp/)': 1, 'collection': 3, 'open-source': 3, 'software': 7, 'utilities': 3, 'facilitate': 1, 'using': 18, 'network': 7, 'many': 3, 'computers': 1, 'solve': 1, 'problems': 1, 'involving': 1, 'massive': 1, 'amounts': 1, 'data': 50, 'computation.': 1, 'It': 10, 'provides': 4, 'framework': 4, 'distributed': 8, 'storage': 13, 'processing': 8, 'big': 5, 'MapReduce': 35, 'programming': 7, 'model.': 2, 'Originally': 1, 'designed': 5, 'computer': 1, 'clusters': 11, 'built': 2, 'from': 21, 'commodity': 2, 'hardware[3]—still': 1, 'common': 2, 'use—it': 1, 'also': 14, 'found': 1, 'use': 8, 'higher-end': 1, 'hardware.[4][5]': 1, 'All': 1, 'modules': 2, 'are': 33, 'with': 39, 'fundamental': 1, 'assumption': 1, 'hardware': 5, 'failures': 2, 'occurrences': 1, 'should': 2, 'automatically': 1, 'handled': 1, 'by': 21, 'framework.[2]': 1, 'The': 48, 'core': 2, 'consists': 4, 'part,': 1, 'known': 2, 'as': 26, 'File': 18, 'System': 9, '(HDFS),': 1, 'part': 3, 'which': 20, 'splits': 1, 'files': 4, 'into': 10, 'blocks': 2, 'distributes': 1, 'them': 2, 'across': 8, 'nodes': 12, 'cluster.': 3, 'then': 3, 'transfers': 1, 'packaged': 1, 'code': 12, 'process': 3, 'parallel.': 1, 'approach': 3, 'takes': 4, 'advantage': 4, 'locality,[6]': 1, 'where': 5, 'manipulate': 1, 'they': 7, 'have': 7, 'access': 6, 'to.': 1, 'allows': 5, 'dataset': 1, 'processed': 1, 'faster': 1, 'more': 7, 'efficiently': 2, 'than': 4, 'would': 2, 'conventional': 1, 'supercomputer': 1, 'architecture': 2, 'relies': 1, 'parallel': 2, 'computation': 2, 'via': 3, 'high-speed': 1, 'networking.[7][8]': 1, 'base': 2, 'composed': 1, 'following': 1, 'modules:': 1, 'Common': 5, '–': 43, 'libraries': 1, 'needed': 2, 'other': 10, 'modules;': 1, '(HDFS)': 1, 'file-system': 5, 'stores': 4, 'machines,': 1, 'providing': 2, 'very': 5, 'high': 2, 'aggregate': 1, 'bandwidth': 1, 'cluster;': 1, 'YARN': 9, 'introduced': 2, '2012': 5, 'platform': 3, 'responsible': 1, 'managing': 1, 'computing': 2, 'resources': 4, 'scheduling': 4, "users'": 1, 'applications;[9][10]': 1, 'implementation': 5, 'model': 1, 'large-scale': 1, 'processing.': 2, 'term': 3, 'come': 1, 'refer': 1, 'not': 10, 'just': 1, 'aforementioned': 1, 'sub-modules,': 1, 'but': 4, 'ecosystem,[11]': 1, 'additional': 2, 'packages': 1, 'can': 26, 'installed': 1, 'top': 6, 'alongside': 1, 'Hadoop,': 5, 'such': 6, 'Pig,': 2, 'Hive,': 2, 'HBase,': 2, 'Phoenix,': 1, 'Spark,': 1, 'ZooKeeper,': 1, 'Cloudera': 6, 'Impala,': 1, 'Flume,': 1, 'Sqoop,': 1, 'Oozie,': 1, 'Storm.[12]': 1, "Hadoop's": 2, 'HDFS': 25, 'components': 2, 'were': 1, 'inspired': 2, 'Google': 29, 'papers': 2, 'their': 2, 'System.[13]': 1, 'itself': 2, 'mostly': 2, 'language,': 1, 'some': 7, 'native': 2, 'C': 1, 'command': 2, 'line': 2, 'shell': 3, 'scripts.': 1, 'Though': 1, 'common,': 1, 'any': 4, 'language': 1, 'used': 12, '"Hadoop': 19, 'Streaming"': 2, 'implement': 1, '"map"': 1, '"reduce"': 1, 'parts': 1, "user's": 1, 'program.[14]': 1, 'Other': 5, 'projects': 3, 'ecosystem': 2, 'expose': 1, 'richer': 1, 'user': 2, 'interfaces.': 1, 'Contents': 2, '1': 6, 'History': 3, '1.1': 1, 'Timeline': 2, '2': 6, 'Architecture': 2, '2.1': 1, 'systems': 11, '2.1.1': 1, '2.1.2': 1, '2.2': 2, 'JobTracker': 6, 'TaskTracker:': 2, 'engine': 3, '2.2.1': 1, 'Scheduling': 3, '2.2.1.1': 1, 'Fair': 4, 'scheduler': 13, '2.2.1.2': 1, 'Capacity': 3, '2.3': 2, 'Difference': 4, 'between': 9, 'vs': 4, '(YARN)': 2, '2.4': 2, '3': 7, '2.5': 2, 'applications': 6, 'Prominent': 2, 'cases': 2, '4': 4, 'hosting': 2, 'cloud': 7, '4.1': 1, 'On': 15, 'Microsoft': 4, 'Azure': 6, '4.2': 1, 'Amazon': 12, 'EC2/S3': 2, 'services': 4, '4.3': 1, 'Elastic': 12, '4.4': 1, 'CenturyLink': 6, 'Cloud': 40, '(CLC)': 2, '4.5': 1, 'Platform': 12, '4.6': 1, 'SAP': 5, '4.7': 1, 'Oracle': 4, '5': 2, 'Commercial': 2, 'support': 9, '5.1': 1, 'Branding': 2, 'Papers': 2, '7': 1, 'See': 3, '8': 3, 'References': 2, '9': 3, 'Bibliography': 2, '10': 4, 'External': 2, 'links': 4, 'According': 1, 'its': 11, 'co-founders,': 1, 'Doug': 6, 'Cutting': 2, 'Mike': 1, 'Cafarella,': 2, 'genesis': 1, 'was': 19, '"Google': 3, 'System"': 1, 'paper': 4, 'published': 3, 'October': 16, '2003.[15][16]': 1, 'spawned': 1, 'another': 1, 'one': 10, '"MapReduce:': 2, 'Simplified': 4, 'Data': 20, 'Processing': 4, 'Large': 4, 'Clusters".[17]': 1, 'Development': 2, 'started': 2, 'Nutch': 6, 'project,': 1, 'moved': 2, 'new': 5, 'subproject': 4, 'January': 11, '2006.[18]': 1, 'Cutting,': 4, 'who': 2, 'working': 3, 'Yahoo!': 9, 'time,': 1, 'named': 4, 'after': 3, 'his': 2, "son's": 3, 'toy': 3, 'elephant.[19]': 1, 'initial': 2, 'factored': 1, 'out': 7, 'consisted': 1, 'about': 3, '5,000': 1, 'lines': 3, '6,000': 1, 'MapReduce.': 2, 'first': 3, 'committer': 1, 'add': 3, 'project': 2, 'Owen': 3, "O'Malley": 2, '(in': 2, 'March': 9, '2006);[20]': 1, '0.1.0': 2, 'released': 6, 'April': 16, '2006.[21]': 1, 'continues': 1, 'evolve': 2, 'through': 5, 'contributions': 1, 'being': 1, 'made': 2, 'project.[22]': 1, 'Year': 1, 'Month': 1, 'Event': 1, 'Ref.': 1, '2003': 1, '[23]': 1, '2004': 1, 'MapReduce:': 2, 'Clusters': 1, '[24]': 1, '2006': 9, 'created': 1, 'mailing': 3, 'lists,': 1, 'jira,': 1, 'wiki': 1, '[25]': 1, 'born': 1, '197': 1, '[26]': 1, 'February': 9, 'NDFS+': 1, 'create': 3, '[27]': 11, "O'Malley's": 1, 'patch': 1, 'goes': 3, '[28]': 1, "Cutting's": 1, 'yellow': 1, 'plush': 1, '[29]': 2, '[30]': 1, 'sorts': 2, '1.8': 1, 'TB': 2, '188': 1, '47.9': 1, 'hours': 2, 'May': 9, 'Yahoo': 11, 'deploys': 2, '300': 1, 'machine': 4, 'cluster': 11, 'reaches': 1, '600': 1, 'machines': 4, '2007': 5, 'runs': 5, 'two': 5, '1,000': 1, 'June': 23, 'Only': 1, 'three': 3, 'companies': 3, '"Powered': 2, 'Page"': 2, '[31]': 2, 'First': 2, 'includes': 5, 'HBase': 5, '[32]': 1, 'Labs': 1, 'creates': 1, 'donates': 1, 'ASF': 4, '[33]': 1, '2008': 11, 'JIRA': 1, 'opened': 1, 'Yarn': 1, 'Jira': 1, '(Mapreduce': 1, '279)': 1, '20': 3, 'moves': 2, 'web': 5, 'index': 3, 'onto': 2, '[34]': 1, 'production': 4, 'generated': 1, '10,000-core': 1, 'Summit': 20, '[35]': 1, 'world': 2, 'record': 1, 'fastest': 1, 'sort': 3, 'terabyte': 4, 'data.': 4, 'Running': 1, '910-node': 1, 'cluster,': 5, 'sorted': 2, '209': 1, 'seconds': 3, 'wins': 2, 'TeraByte': 1, 'Sort': 4, '(World': 1, 'Record': 3, 'sortbenchmark.org)': 1, '[36]': 1, 'July': 6, 'Terabyte': 2, 'Benchmark': 1, '[37]': 1, 'Loading': 1, 'TB/day': 1, 'Cloudera,': 3, 'distributor': 2, 'founded': 2, '[38]': 1, 'November': 6, '68': 1, '2009': 8, '17': 4, '24,000': 1, 'petabyte': 1, '[39]': 1, '62': 1, 'Second': 2, '[40]': 1, 'Core': 1, 'renamed': 1, '[41]': 10, 'MapR,': 1, '[42]': 1, 'now': 3, 'separate': 5, '2010': 9, 'Kerberos': 2, 'added': 3, '[43]': 1, 'Graduates': 4, '[44]': 1, 'Third': 2, '[45]': 1, '4,000': 1, 'nodes/70': 1, 'petabytes': 4, '[46]': 2, 'Facebook': 3, '2,300': 1, 'clusters/40': 1, 'September': 4, 'Hive': 3, '[47]': 1, 'Pig': 1, '[48]': 1, '2011': 7, 'Zookeeper': 1, '[49]': 1, 'Facebook,': 1, 'LinkedIn,': 1, 'eBay': 1, 'IBM': 6, 'collectively': 1, 'contribute': 1, '200,000': 1, '[50]': 2, 'prize': 2, 'Media': 2, 'Guardian': 2, 'Innovation': 3, 'Awards': 1, '[51]': 1, 'Rob': 1, 'Beardon': 1, 'Eric': 1, 'Badleschieler': 1, 'spin': 2, 'Hortonworks': 6, 'Yahoo.': 6, '[52]': 2, '42K': 1, 'hundreds': 1, 'Annual': 2, '(1,700': 1, 'attendees)': 6, '[53]': 1, 'Debate': 1, 'over': 5, 'company': 1, 'had': 4, 'contributed': 1, 'Hadoop.': 2, 'community': 1, 'replace': 1, 'San': 3, 'Jose': 3, '(2,100': 1, '[54]': 1, '1.0': 1, 'Available': 11, '2013': 6, 'Amsterdam': 3, '(500': 1, '[55]': 1, 'deployed': 4, '[56]': 1, '(2,700': 1, '[57]': 1, '2014': 9, 'Spark': 7, 'Level': 1, 'Project': 3, '[58]': 1, 'summit': 2, '(750': 1, '[59]': 1, '(3,200': 1, '[60]': 1, 'August': 6, '2.6': 1, '2015': 2, 'Europe': 3, '[61]': 1, '2.7': 1, '2017': 3, '2.8': 1, '[62]': 1, '2.9': 1, '[63]': 1, '3.0': 1, '[64]': 1, '2018': 1, '3.1': 1, '[65]': 1, 'also:': 1, 'System,': 1, 'package,': 1, 'operating': 5, 'level': 2, 'abstractions,': 1, '(either': 1, 'MapReduce/MR1': 1, 'YARN/MR2)[66]': 1, '(HDFS).': 1, 'package': 1, 'ARchive': 1, '(JAR)': 1, 'scripts': 3, 'start': 1, 'For': 4, 'effective': 1, 'work,': 1, 'every': 3, 'Hadoop-compatible': 1, 'provide': 5, 'location': 1, 'awareness': 3, 'name': 2, 'rack': 2, '(or,': 1, 'precisely,': 1, 'switch)': 1, 'worker': 4, 'node': 12, 'is.': 1, 'information': 4, 'execute': 2, 'is,': 1, 'and,': 1, 'failing': 2, 'that,': 1, 'same': 4, 'rack/switch': 1, 'reduce': 7, 'backbone': 2, 'traffic.': 1, 'uses': 6, 'method': 1, 'replicating': 2, 'redundancy': 2, 'racks.': 1, 'reduces': 4, 'impact': 2, 'power': 1, 'outage': 1, 'switch': 1, 'failure;': 1, 'if': 4, 'occurs,': 1, 'will': 1, 'remain': 1, 'available.[67]': 1, 'A': 14, 'multi-node': 1, 'small': 4, 'single': 6, 'master': 3, 'nodes.': 4, 'Job': 3, 'Tracker,': 2, 'Task': 2, 'NameNode,': 3, 'DataNode.': 1, 'slave': 3, 'acts': 1, 'both': 2, 'DataNode': 2, 'TaskTracker,': 1, 'though': 2, 'possible': 3, 'data-only': 1, 'compute-only': 1, 'These': 4, 'normally': 1, 'only': 4, 'nonstandard': 1, 'applications.[68]': 1, 'requires': 2, 'Runtime': 1, 'Environment': 1, '(JRE)': 1, '1.6': 1, 'higher.': 1, 'standard': 1, 'startup': 1, 'shutdown': 1, 'require': 2, 'Secure': 2, 'Shell': 1, '(SSH)': 1, 'set': 1, 'up': 5, 'cluster.[69]': 1, 'In': 15, 'larger': 1, 'managed': 5, 'dedicated': 1, 'NameNode': 2, 'server': 3, 'host': 1, 'index,': 1, 'secondary': 4, 'generate': 1, 'snapshots': 2, "namenode's": 2, 'memory': 2, 'structures,': 1, 'thereby': 1, 'preventing': 1, 'corruption': 1, 'loss': 2, 'Similarly,': 1, 'standalone': 1, 'manage': 2, 'job': 11, 'When': 2, 'alternate': 2, 'system,': 5, 'replaced': 2, 'file-system-specific': 1, 'equivalents.': 1, 'distributed,': 1, 'scalable,': 1, 'portable': 1, 'framework.': 2, 'Some': 3, 'consider': 1, 'instead': 1, 'store': 3, 'due': 2, 'lack': 1, 'POSIX': 3, 'compliance,[70]': 1, 'does': 3, 'commands': 1, 'application': 8, 'interface': 1, '(API)': 1, 'methods': 1, 'similar': 2, 'systems.[71]': 1, 'nominally': 1, 'namenode': 9, 'plus': 1, 'datanodes,': 2, 'although': 1, 'options': 1, 'available': 7, 'criticality.': 1, 'Each': 2, 'datanode': 1, 'serves': 1, 'block': 1, 'protocol': 1, 'specific': 2, 'HDFS.': 1, 'TCP/IP': 1, 'sockets': 1, 'communication.': 1, 'Clients': 1, 'remote': 2, 'procedure': 1, 'calls': 1, '(RPC)': 1, 'communicate': 1, 'each': 3, 'other.': 1, '(typically': 1, 'range': 1, 'gigabytes': 1, 'terabytes[72])': 1, 'machines.': 1, 'achieves': 1, 'reliability': 2, 'hosts,': 1, 'hence': 2, 'theoretically': 1, 'redundant': 1, 'array': 1, 'independent': 1, 'disks': 1, '(RAID)': 1, 'hosts': 1, '(but': 1, 'increase': 1, 'input-output': 1, '(I/O)': 1, 'performance': 6, 'RAID': 1, 'configurations': 1, 'still': 1, 'useful).': 1, 'With': 3, 'default': 4, 'replication': 2, 'value,': 1, '3,': 2, 'stored': 1, 'nodes:': 1, 'rack,': 1, 'different': 2, 'rack.': 2, 'rebalance': 1, 'data,': 4, 'move': 2, 'copies': 1, 'around,': 1, 'keep': 2, 'high.': 1, 'fully': 2, 'POSIX-compliant,': 1, 'because': 1, 'requirements': 1, 'differ': 1, 'target': 1, 'goals': 1, 'application.': 2, 'trade-off': 1, 'having': 3, 'POSIX-compliant': 1, 'increased': 1, 'throughput': 1, 'non-POSIX': 1, 'operations': 2, 'Append.[73]': 1, 'high-availability': 1, 'capabilities,': 1, 'announced': 5, 'version': 6, '2012,[74]': 1, 'letting': 1, 'main': 3, 'metadata': 2, '(the': 1, 'NameNode)': 1, 'manually': 2, 'fail-over': 1, 'backup.': 1, 'developing': 1, 'automatic': 2, 'fail-overs.': 1, 'so-called': 1, 'namenode,': 1, 'misleading': 1, 'might': 1, 'incorrectly': 1, 'interpret': 1, 'backup': 1, 'primary': 4, 'offline.': 1, 'fact,': 1, 'regularly': 1, 'connects': 1, 'builds': 1, 'directory': 3, 'information,': 1, 'saves': 1, 'local': 1, 'directories.': 1, 'checkpointed': 1, 'images': 1, 'restart': 1, 'failed': 1, 'replay': 1, 'entire': 2, 'journal': 1, 'actions,': 1, 'edit': 1, 'log': 2, 'up-to-date': 1, 'structure.': 1, 'Because': 1, 'point': 2, 'management': 2, 'metadata,': 1, 'become': 3, 'bottleneck': 1, 'supporting': 1, 'huge': 2, 'files,': 1, 'especially': 2, 'files.': 1, 'Federation,': 1, 'addition,': 1, 'aims': 1, 'tackle': 1, 'problem': 1, 'certain': 1, 'extent': 1, 'allowing': 1, 'namespaces': 1, 'served': 1, 'namenodes.': 1, 'Moreover,': 1, 'there': 3, 'issues,': 1, 'scalability': 1, 'problems,': 1, 'Single': 1, 'Point': 1, 'Failure': 1, '(SPoF),': 1, 'bottlenecks': 1, 'requests.': 1, 'One': 2, 'tracker': 4, 'task': 4, 'tracker.': 1, 'schedules': 2, 'map': 5, 'jobs': 6, 'trackers': 1, 'location.': 1, 'example:': 1, '(a,': 2, 'b,': 2, 'c)': 2, 'X': 2, '(x,': 2, 'y,': 2, 'z),': 1, 'perform': 3, 'tasks': 2, 'scheduled': 1, 'z).': 1, 'amount': 2, 'traffic': 2, 'prevents': 1, 'unnecessary': 1, 'transfer.': 1, 'systems,': 1, 'always': 1, 'available.': 1, 'significant': 1, 'job-completion': 1, 'times': 3, 'demonstrated': 1, 'data-intensive': 1, 'jobs.[75]': 1, 'immutable': 1, 'suitable': 1, 'requiring': 1, 'concurrent': 1, 'write-operations.[73]': 1, 'mounted': 3, 'directly': 3, 'Filesystem': 2, 'Userspace': 1, '(FUSE)': 1, 'virtual': 2, 'Linux': 7, 'Unix': 1, 'systems.': 3, 'achieved': 1, 'API,': 1, 'Thrift': 1, 'API': 2, '(generates': 1, 'client': 3, 'languages': 1, 'e.g.': 2, 'C++,': 1, 'Java,': 1, 'Python,': 1, 'PHP,': 1, 'Ruby,': 1, 'Erlang,': 1, 'Perl,': 1, 'Haskell,': 1, 'C#,': 1, 'Cocoa,': 1, 'Smalltalk,': 1, 'OCaml),': 1, 'command-line': 1, 'interface,': 1, 'HDFS-UI': 1, 'HTTP,': 1, '3rd-party': 1, 'libraries.[76]': 1, 'portability': 2, 'various': 4, 'platforms': 2, 'compatibility': 1, 'variety': 1, 'underlying': 4, 'design': 1, 'introduces': 1, 'limitations': 2, 'result': 1, 'bottlenecks,': 1, 'since': 1, 'cannot': 2, 'features': 3, 'exclusive': 1, 'running.[77]': 1, 'Due': 1, 'widespread': 1, 'integration': 1, 'enterprise-level': 1, 'infrastructure,': 1, 'monitoring': 2, 'scale': 2, 'increasingly': 1, 'important': 2, 'issue.': 1, 'Monitoring': 1, 'end-to-end': 1, 'tracking': 2, 'metrics': 1, 'namenodes,': 1, 'system.[78]': 1, 'There': 8, 'currently': 3, 'several': 4, 'track': 1, 'performance,': 1, 'including': 4, 'HortonWorks,': 1, 'Datadog.': 1, 'works': 2, 'simply': 1, 'file://': 1, 'URL;': 1, 'however,': 2, 'comes': 2, 'price': 1, 'locality.': 1, 'To': 1, 'traffic,': 1, 'needs': 1, 'know': 1, 'servers': 1, 'closest': 1, 'Hadoop-specific': 1, 'bridges': 2, 'provide.': 1, '2011,': 2, 'list': 2, 'supported': 1, 'bundled': 1, 'were:': 1, 'HDFS:': 1, 'own': 2, 'rack-aware': 2, 'system.[79]': 1, 'tens': 1, 'FTP': 2, 'system:': 2, 'all': 3, 'remotely': 1, 'accessible': 1, 'servers.': 1, 'S3': 3, '(Simple': 1, 'Storage': 8, 'Service)': 1, 'object': 2, 'storage:': 2, 'targeted': 1, 'hosted': 2, 'Compute': 2, 'server-on-demand': 1, 'infrastructure.': 1, 'no': 4, 'rack-awareness': 1, 'remote.': 1, 'Windows': 1, 'Blobs': 1, '(WASB)': 1, 'extension': 1, 'distributions': 2, 'blob': 1, 'moving': 1, 'permanently': 1, 'third-party': 1, 'been': 1, 'written,': 1, 'none': 1, 'distributions.': 1, 'However,': 1, 'commercial': 3, 'ship': 1, 'alternative': 3, 'specifically': 1, 'MapR.': 1, '2009,': 3, 'discussed': 2, 'running': 4, 'General': 2, 'Parallel': 2, 'System.[80]': 1, 'source': 5, '2009.[81]': 1, '2010,': 4, 'Parascale': 3, 'run': 5, 'against': 1, 'system.[82]': 1, 'Appistry': 1, 'driver': 1, 'CloudIQ': 2, 'product.[83]': 1, 'HP': 1, 'location-aware': 1, 'IBRIX': 1, 'Fusion': 1, 'driver.[84]': 1, 'MapR': 3, 'Technologies': 1, 'Inc.': 3, 'availability': 1, 'FS,': 1, 'full': 1, 'random-access': 1, 'read/write': 1, 'system.': 2, 'Main': 3, 'article:': 1, 'Atop': 1, 'Engine,': 1, 'JobTracker,': 2, 'submit': 1, 'jobs.': 5, 'pushes': 1, 'work': 7, 'TaskTracker': 8, 'striving': 1, 'close': 1, 'possible.': 1, 'knows': 1, 'nearby.': 1, 'If': 3, 'actual': 2, 'resides,': 1, 'priority': 2, 'given': 2, 'network.': 1, 'fails': 1, 'out,': 1, 'rescheduled.': 1, 'spawns': 1, '(JVM)': 1, 'prevent': 1, 'crashes': 1, 'JVM.': 1, 'heartbeat': 1, 'sent': 1, 'few': 1, 'minutes': 1, 'check': 1, 'status.': 1, 'Tracker': 2, 'status': 1, 'exposed': 1, 'Jetty': 1, 'viewed': 1, 'browser.': 1, 'Known': 1, 'are:': 2, 'allocation': 1, 'TaskTrackers': 1, 'simple.': 1, 'Every': 3, 'slots': 1, '(such': 2, '"4': 1, 'slots").': 1, 'active': 1, 'slot.': 2, 'allocates': 2, 'nearest': 1, 'consideration': 1, 'current': 1, 'load': 1, 'allocated': 3, 'machine,': 1, 'availability.': 1, 'slow,': 1, 'delay': 1, 'towards': 1, 'end,': 1, 'everything': 1, 'end': 1, 'waiting': 1, 'slowest': 1, 'task.': 1, 'speculative': 1, 'execution': 2, 'enabled,': 1, 'executed': 1, 'By': 4, 'FIFO': 1, 'scheduling,': 1, 'optionally': 1, 'priorities': 1, 'schedule': 1, 'queue.[85]': 1, '0.19': 1, 'refactored': 1, 'while': 3, 'adding': 1, 'ability': 1, 'scheduler,': 1, 'described': 1, 'next).[86]': 1, 'fair': 4, 'developed': 3, 'Facebook.[87]': 1, 'goal': 1, 'fast': 1, 'response': 1, 'Quality': 1, 'service': 3, '(QoS)': 1, 'basic': 1, 'concepts.[88]': 1, 'Jobs': 1, 'grouped': 1, 'pools.': 1, 'pool': 1, 'assigned': 1, 'guaranteed': 1, 'minimum': 2, 'share.': 1, 'Excess': 1, 'capacity': 3, 'split': 3, 'default,': 1, 'uncategorized': 1, 'go': 1, 'pool.': 1, 'Pools': 1, 'specify': 1, 'slots,': 2, 'well': 2, 'limit': 1, 'supports': 3, 'those': 1, 'scheduler.[89]': 1, 'Queues': 1, 'fraction': 1, 'total': 2, 'resource': 4, 'capacity.': 2, 'Free': 3, 'queues': 1, 'beyond': 1, 'Within': 2, 'queue,': 1, "queue's": 1, 'resources.': 1, 'preemption': 1, 'once': 1, 'running.': 1, 'biggest': 2, 'difference': 1, 'involves': 1, 'technology.': 1, 'included': 1, 'Common,': 1, 'HDFS,': 1, 'MapReduce,': 1, 'second': 1, 'came': 1, 'technology': 1, 'called': 3, '-': 8, 'acronym': 1, '"Yet': 1, 'Another': 1, 'Resource': 2, 'Negotiator".': 1, 'YARN,': 1, 'open': 1, 'technology,': 1, 'strives': 1, 'allocate': 1, 'effectively.': 1, 'daemons,': 1, 'take': 3, 'care': 1, 'tasks:': 1, 'progress': 1, 'monitoring.': 1, 'daemons': 1, 'manager': 2, 'master.': 1, 'applications,': 2, 'monitors': 1, 'process.': 1, 'provided': 1, '3.': 1, 'example,': 2, '2,': 1, 'enables': 1, 'nodes,': 1, 'solves': 1, 'failure': 1, 'problem.': 1, 'containers': 1, 'principle': 1, 'Docker,': 1, 'time': 2, 'spent': 1, 'development.': 1, 'changes': 3, 'decreases': 1, 'overhead': 1, 'erasure': 1, 'coding.': 1, 'Also,': 1, 'permits': 1, 'usage': 1, 'GPU': 1, 'within': 3, 'substantial': 1, 'benefit': 1, 'Deep': 1, 'Learning': 1, 'algorithms': 1, 'cluster.[90]': 1, 'restricted': 1, 'under': 2, 'development': 1, 'Apache.': 1, 'database,': 1, 'Mahout': 1, 'learning': 2, 'Warehouse': 1, 'can,': 1, 'theory,': 1, 'batch-oriented': 1, 'rather': 1, 'real-time,': 1, 'data-intensive,': 1, 'benefits': 1, 'complement': 1, 'real-time': 1, 'lambda': 1, 'architecture,': 1, 'Storm,': 2, 'Flink': 2, 'Streaming.[91]': 1, 'As': 3, 'Hadoop[92]': 1, 'included:-': 1, 'and/or': 3, 'clickstream': 1, 'analysis': 1, 'kinds': 1, 'marketing': 1, 'analytics': 3, 'sophisticated': 1, 'mining': 1, 'image': 2, 'XML': 1, 'messages': 1, 'crawling': 1, 'text': 1, 'general': 1, 'archiving,': 1, 'relational/tabular': 1, 'compliance': 1, '19,': 1, '2008,': 1, 'launched': 1, 'what': 2, 'claimed': 2, "world's": 2, 'largest': 3, 'Search': 2, 'Webmap': 1, '10,000': 1, 'cores': 1, 'produced': 1, 'query.[93]': 1, 'centers.': 1, 'bootstraps': 1, 'image,': 1, 'distribution.': 1, 'Work': 1, 'include': 2, 'calculations': 1, 'engine.': 1, 'community.[94]': 1, '21': 3, 'PB': 2, 'storage.[95]': 1, '2012,': 1, 'grown': 1, '100': 2, 'PB[96]': 1, 'later': 2, 'year': 1, 'growing': 1, 'roughly': 1, 'half': 2, 'per': 1, 'day.[97]': 1, '2013,': 1, 'adoption': 1, 'widespread:': 1, 'Fortune': 1, '50': 1, 'Hadoop.[98]': 1, 'traditional': 3, 'onsite': 1, 'datacenter': 2, 'cloud.[99]': 1, 'organizations': 2, 'deploy': 1, 'need': 3, 'acquire': 1, 'setup': 1, 'expertise.[100]': 1, 'Vendors': 1, 'offer': 2, 'Microsoft,': 1, 'Amazon,': 1, ',': 1, 'IBM,[101]': 1, 'Google,': 1, 'SAP[102],': 1, 'Oracle.[103]': 1, 'HDInsight[104]': 1, 'Azure.': 1, 'HDInsight': 3, 'HDP': 1, 'jointly': 1, 'HDI': 2, 'Hortonworks.': 2, 'extensions': 1, '.NET': 1, 'addition': 1, 'Java).': 1, 'creation': 1, 'Ubuntu.[104]': 1, 'deploying': 1, 'cloud,': 1, 'want': 1, 'get': 1, 'charged': 1, 'compute': 1, 'used.[104]': 1, 'implementations': 2, 'on-premises': 1, 'backup,': 1, 'development/test,': 1, 'bursting': 1, 'scenarios.[104]': 1, 'Virtual': 1, 'Machines.': 1, '(EC2)': 1, 'Simple': 2, 'Service': 3, '(S3).[105]': 1, 'New': 4, 'York': 3, 'Times': 1, 'EC2': 3, 'instances': 2, 'raw': 1, 'TIFF': 1, '(stored': 1, 'S3)': 1, '11': 18, 'million': 1, 'finished': 1, 'PDFs': 1, 'space': 1, '24': 1, 'cost': 2, '$240': 1, '(not': 1, 'bandwidth).[106]': 1, 'releases,': 1, 'below': 1, 'expects': 1, 'filesystem.': 1, 'Specifically,': 1, 'rename()': 1, 'delete()': 1, 'directories': 1, 'atomic,': 1, 'proportional': 1, 'entries': 1, 'them.': 1, '(EMR)[107]': 1, 'Amazon.com': 2, '2009.': 3, 'Provisioning': 1, 'terminating': 1, 'jobs,': 1, 'handling': 1, 'transfer': 1, '(VM)': 1, '(Object': 1, 'Storage)': 1, 'automated': 1, 'warehouse': 1, 'services,': 1, 'offered': 1, 'MapReduce.[108]': 1, 'Support': 1, 'Spot': 4, 'Instances[109]': 1, '2011.[110]': 1, 'fault-tolerant': 1, 'failures,[111]': 1, 'recommended': 1, 'Instance': 1, 'Group': 1, 'spot': 1, 'lower': 1, 'maintaining': 1, 'availability.[112]': 1, 'Cloud[113]': 1, 'offers': 3, 'un-managed': 1, 'model.[114]': 1, 'CLC': 1, 'customers': 1, 'Blueprints,': 1, 'newest': 1, 'portfolio,': 1, 'Cassandra': 2, 'MongoDB': 1, 'solutions.[115]': 1, 'ways': 1, 'ranging': 1, 'self-managed': 1, 'Google-managed.[116]': 1, 'Dataproc:': 1, 'service[117]': 1, 'tools': 1, '(bdutil):': 1, 'clusters[118]': 1, 'third': 1, 'party': 1, 'distributions:': 1, 'Director': 1, 'Plugin': 1, 'Platform[119]': 1, 'bdutil': 2, 'HDP[120]': 1, 'MapR[121]': 1, 'connectors': 1, 'products': 2, 'connector': 2, 'BigQuery': 1, 'BigQuery.': 1, 'Big': 11, 'Services': 5, 'performance-driven': 1, 'robust': 1, 'Spark.': 1, 'Cloud.': 1, 'deployment': 1, 'models.': 1, 'SQL': 1, 'combined': 1, 'Oracle’s': 1, 'platform.': 1, 'Public': 1, 'Hybrid': 1, 'At': 1, 'Customer.': 1, 'Hadoop.[122]': 1, 'stated': 1, 'officially': 1, 'Distributions': 1, 'Hadoop.[123]': 1, 'naming': 1, 'derivative': 1, 'vendors': 1, '"compatible"': 1, 'somewhat': 1, 'controversial': 1, 'developer': 1, 'community.[124]': 1, 'influenced': 1, 'birth': 1, 'growth': 1, 'Jeffrey': 1, 'Dean,': 2, 'Sanjay': 1, 'Ghemawat': 1, '(2004)': 1, 'Clusters,': 1, 'Google.': 2, 'develop': 1, 'Map-Reduce': 1, 'He': 1, 'elephant.': 1, 'Michael': 2, 'Franklin,': 1, 'Alon': 1, 'Halevy,': 1, 'David': 1, 'Maier': 1, '(2005)': 1, 'Databases': 1, 'Dataspaces:': 1, 'Abstraction': 1, 'Information': 1, 'Management.': 1, 'authors': 1, 'highlight': 1, 'accept': 1, 'formats': 1, 'APIs': 2, 'based': 1, "system's": 1, 'understanding': 1, 'Fay': 1, 'Chang': 1, 'et': 2, 'al.': 2, '(2006)': 1, 'Bigtable:': 1, 'Structured': 1, 'Data,': 1, 'Robert': 1, 'Kallman': 1, '(2008)': 1, 'H-store:': 1, 'high-performance,': 1, 'transaction': 1, 'portal': 2, 'Accumulo': 2, 'Bigtable[125]': 1, 'column-oriented': 1, 'database': 2, 'CouchDB': 1, 'JSON': 1, 'documents,': 1, 'JavaScript': 1, 'queries,': 1, 'regular': 1, 'HTTP': 1, 'Intensive': 1, 'Computing': 6, 'HPCC': 1, 'LexisNexis': 1, 'Risk': 1, 'Solutions': 1, 'High': 1, 'Performance': 2, 'Cluster': 1, 'Hypertable': 1, 'Sector/Sphere': 1, 'Open': 1, 'Utility': 1, 'Management': 1, 'Releases".': 2, 'apache.org.': 4, 'Foundation.': 4, 'Retrieved': 108, '2014-12-06.': 1, '"Welcome': 3, 'Hadoop!".': 1, 'hadoop.apache.org.': 6, '2016-08-25.': 1, 'Judge,': 1, 'Peter': 1, '(2012-10-22).': 1, '"Doug': 1, 'Cutting:': 1, 'Is': 1, 'No': 1, 'Bubble".': 1, 'silicon.co.uk.': 1, '2018-03-11.': 3, 'Woodie,': 1, 'Alex': 1, '(2014-05-12).': 1, '"Why': 2, 'Power".': 1, 'datanami.com.': 1, 'Datanami.': 1, 'Hemsoth,': 1, 'Nicole': 1, '(2014-10-15).': 1, '"Cray': 1, 'Launches': 3, 'HPC': 3, 'Airspace".': 1, 'hpcwire.com.': 1, '"What': 1, '(HDFS)?".': 1, 'ibm.com.': 2, 'IBM.': 4, '2014-10-30.': 3, 'Malak,': 1, '(2014-09-19).': 1, '"Data': 1, 'Locality:': 1, 'vs.': 2, 'Spark".': 1, 'datascienceassn.org.': 1, 'Science': 2, 'Association.': 1, '"Characterization': 1, 'Optimization': 1, 'Memory-Resident': 1, 'Systems"': 1, '(pdf).': 1, 'IEEE.': 2, '2014.': 1, '"Resource': 1, '(Apache': 1, '2.5.1': 1, 'API)".': 1, '2014-09-12.': 1, '2014-09-30.': 2, 'Murthy,': 1, 'Arun': 1, '(2012-08-15).': 1, '"Apache': 9, 'Concepts': 1, 'Applications".': 1, 'hortonworks.com.': 2, '"Continuuity': 1, 'Raises': 1, '$10': 1, 'Million': 1, 'Series': 1, 'Round': 1, 'Ignite': 1, 'Application': 1, 'Ecosystem".': 1, 'finance.yahoo.com.': 1, 'Marketwired.': 1, '2012-11-14.': 1, '"Hadoop-related': 1, 'at".': 1, 'Hadoop.apache.org.': 6, '2013-10-17.': 10, 'Analytics:': 1, 'Discovering,': 1, 'Analyzing,': 1, 'Visualizing': 1, 'Presenting': 1, 'Data.': 1, 'John': 1, 'Wiley': 1, '&': 3, 'Sons.': 1, '2014-12-19.': 1, 'p.': 5, '300.': 1, 'ISBN': 6, '9781118876220.': 1, '2015-01-29.': 1, '"[nlpatumd]': 1, 'Adventures': 1, 'Perl".': 1, 'Mail-archive.com.': 1, '2010-05-02.': 1, '2013-04-05.': 1, 'Mike;': 1, 'Ben;': 1, 'Lorica,': 1, '(2016-03-31).': 1, '"The': 6, 'next': 1, 'Hadoop".': 6, "O'Reilly": 2, 'Media.': 2, '2017-10-12.': 1, 'Ghemawat,': 2, 'Sanjay;': 1, 'Gobioff,': 1, 'Howard;': 1, 'Leung,': 1, 'Shun-Tak.': 1, 'System".': 2, 'Jeffrey;': 2, 'Sanjay.': 1, 'Clusters".': 1, '(28': 1, 'Jan': 1, '2006).': 2, '"new': 1, 'lists': 2, 'request:': 2, 'hadoop".': 1, 'issues.apache.org.': 1, 'Lucene': 1, 'PMC': 1, 'voted': 1, 'sub-project': 1, 'Hadoop"': 2, 'Vance,': 1, 'Ashlee': 1, '(2009-03-17).': 1, '"Hadoop,': 1, 'Program,': 1, 'Finds': 1, 'Uses': 1, 'Beyond': 1, 'Search".': 1, 'Times.': 2, 'Archived': 3, 'original': 3, '30,': 1, '2011.': 3, '2010-01-20.': 1, '(30': 1, '"[RESULT]': 1, 'VOTE:': 1, 'committer".': 1, 'hadoop-common-dev': 1, '(Mailing': 1, 'list).': 1, '"Index': 2, '/dist/hadoop/core".': 2, 'archive.apache.org.': 1, '2017.': 18, '"Who': 1, 'We': 1, 'Are".': 1, 'Research': 2, 'Publication:': 2, '2016-03-09.': 33, 'MapReduce".': 1, '"[INFRA-700]': 1, 'hadoop': 1, 'JIRA".': 3, '"[HADOOP-1]': 1, 'import': 1, 'White,': 2, 'Tom': 2, '(2012).': 1, 'Hadoop:': 4, 'Definitive': 2, 'Guide': 3, '(3rd': 1, 'ed.).': 5, "O'Reilly.": 2, '9781449328917.': 1, '"[NUTCH-197]': 1, 'NullPointerException': 1, 'TaskRunner': 1, 'jar': 1, '"lib"': 1, '"From': 1, 'Spiders': 1, 'Elephants:': 1, '2009".': 1, 'Riccomini.name.': 1, 'Gates,': 1, 'Alan': 1, '(2011).': 1, 'Programming': 1, 'Pig.': 1, '10.': 1, '978-1-4493-0264-1.': 1, '"Yahoo!': 3, "World's": 2, 'Largest': 2, 'Production': 2, 'Application".': 2, 'hadoopnew': 3, '2013-05-26.': 1, '"RE:': 1, 'workshop': 1, 'Yahoo!".': 2, '"TeraByte': 1, '(PDF).': 9, 'Sortbenchmark.org.': 2, 'Wins': 1, 'Benchmark".': 1, 'Developer.yahoo.com.': 1, '"Cloudera".': 1, 'Crunchbase.com.': 2, '"Winning': 1, '60': 1, 'Dash': 1, 'Yellow': 1, 'Elephant"': 1, '"Events': 1, 'Media"': 1, 'Mollynix.com.': 1, 'Apache™': 2, 'Hadoop®!".': 1, '"MapR': 2, 'Technologies".': 1, 'Updates': 1, '2010".': 2, 'Think': 1, 'Analytics.': 1, '25,': 1, '2016.': 6, '"Baldeschwieler': 1, 'beta': 1, 'test': 1, 'Security,': 1, 'authentication': 1, 'colocation': 1, 'business': 1, 'sensitive': 1, 'cluster."': 1, 'HBase™': 1, 'Home".': 2, 'Agenda': 1, 'available!".': 1, 'TM".': 1, 'Pig!".': 1, 'ZooKeeper': 1, '"Reality': 1, 'Check:': 1, 'Contributions': 2, '—': 1, 'Hortonworks".': 3, 'Awards".': 1, 'Guardian.': 1, 'Harris,': 1, 'Derrick.': 1, 'history': 2, 'future': 1, 'data".': 1, 'Gigaom.': 1, '2011:': 1, '29th,': 1, 'Santa': 1, 'Clara': 1, 'Convention': 1, 'Center".': 1, '"Fifth': 1, 'Kicks': 1, 'Off': 1, 'Attendance': 1, "It's": 1, 'Wrap!': 1, 'Yahoo!:': 1, 'More': 1, 'Than': 1, 'Ever': 1, 'Before".': 1, 'North': 1, 'America': 1, 'Draws': 1, 'Ecosystem': 1, 'Support".': 1, 'Business': 2, 'Wire.': 1, 'Announces': 1, 'Spark™': 1, 'Top-Level': 1, ':': 1, 'Blog".': 1, '"Loved': 1, 'Hope': 1, 'you': 2, 'did': 1, 'too!': 1, 'HANA".': 1, 'Keeps': 1, 'Getting': 1, 'Bigger".': 1, 'Pentaho.': 1, '2015,': 1, '15th–16th': 1, '2015".': 1, 'Lanyrd.': 1, '2.8.0".': 1, '2018-08-18.': 4, '2.9.0".': 1, '3.0.0".': 1, '3.1.0".': 1, 'Chouraria,': 1, 'Harsh': 1, '(21': 1, '2012).': 1, '"MR2': 1, 'Briefly': 1, 'Explained".': 1, 'Cloudera.com.': 1, '23': 1, '2013.': 5, '"HDFS': 5, 'User': 2, 'Guide".': 3, '2014-09-04.': 1, '"Running': 2, 'Ubuntu': 2, 'System(Multi-Node': 1, 'Cluster)".': 2, '(Single-Node': 1, 'Evans,': 1, 'Chris': 1, '(Oct': 1, '2013).': 1, '"Big': 1, 'basics".': 1, 'computerweekly.com.': 1, 'Computer': 2, 'Weekly.': 1, 'sense': 1, "isn't": 1, 'usually': 1, 'view"': 1, 'deRoos,': 1, 'Dirk.': 1, '"Managing': 1, 'Files': 1, 'Commands".': 1, 'dummies.com.': 1, 'Dummies.': 1, 'Architecture".': 1, 'Pessach,': 1, 'Yaniv': 1, '(2013).': 1, '"Distributed': 1, 'Storage"': 1, '(Distributed': 1, 'Storage:': 1, 'Concepts,': 1, 'Algorithms,': 1, 'Implementations': 1, '"Version': 1, 'manual': 1, 'failover': 1, 'failover:".': 1, '30': 1, '"Improving': 1, 'placement': 1, 'heterogeneous': 1, 'Clusters"': 1, 'Eng.auburn.ed.': 1, '2010.': 2, '"Mounting': 1, 'HDFS".': 1, '2016-08-05.': 1, 'Shafer,': 1, 'Rixner,': 1, 'Scott;': 1, 'Cox,': 1, 'Alan.': 1, 'Filesystem:': 1, 'Balancing': 1, 'Portability': 1, 'Performance"': 1, 'Rice': 1, 'University.': 1, '2016-09-19.': 1, 'Mouzakitis,': 1, 'Evan.': 1, '"How': 2, 'Collect': 1, 'Metrics".': 1, '2016-10-24.': 1, 'Users': 1, 'Rack': 1, 'Awareness".': 1, '"Cloud': 3, 'analytics:': 1, 'Do': 1, 'we': 1, 'really': 1, 'reinvent': 1, 'stack?"': 1, '"HADOOP-6330:': 1, 'Integrating': 1, 'interface".': 1, '2009-10-23.': 1, '"HADOOP-6704:': 1, 'filesystem".': 1, 'Parascale.': 1, '2010-04-14.': 1, 'Storage".': 1, 'Appistry,Inc.': 1, '2010-07-06.': 1, '"High': 1, 'Availability': 1, 'HP.': 1, '2010-06-09.': 1, '"Commands': 1, 'Web.archive.org.': 1, '"Refactor': 1, 'JobTracker".': 1, 'Common.': 1, '2012.': 2, 'Jones,': 1, 'M.': 1, 'Tim': 1, '(6': 1, '2011).': 1, '"Scheduling': 1, 'Scheduler': 1, 'Design': 1, 'Document"': 1, '12': 1, '"CapacityScheduler': 1, '31': 2, '2015.': 4, 'Adds': 1, 'Value': 1, 'Over': 1, '2".': 1, '2018-06-11.': 1, '"Benchmarking': 1, 'Streaming': 1, 'Computation': 1, 'Engines:': 1, '""How': 1, '30+': 1, 'enterprises': 1, 'Hadoop",': 1, 'DBMS2".': 1, 'Dbms2.com.': 1, '19': 1, '2008.': 1, 'Yahoo!.': 1, '2011-04-20.': 1, '"HDFS:': 1, 'cluster!".': 1, 'Hadoopblog.blogspot.com.': 1, '2010-05-09.': 1, '2012-05-23.': 1, '"Under': 2, 'Hood:': 2, 'Namenode': 1, 'Avatarnode".': 1, 'Facebook.': 2, '2012-09-13.': 1, 'Corona".': 1, '2012-11-09.': 1, '"Altior\'s': 1, 'AltraSTAR': 1, 'Accelerator': 1, 'Optimizer': 1, 'Now': 2, 'Certified': 2, 'CDH4': 1, "(Cloudera's": 1, 'Distribution': 1, 'Including': 1, 'Version': 1, '4)"': 1, '(Press': 1, 'release).': 1, 'Eatontown,': 1, 'NJ:': 1, 'Altior': 1, '2012-12-18.': 1, '2013-10-30.': 1, 'Azure".': 1, 'azure.microsoft.com.': 1, '"Hadoop".': 1, 'Azure.microsoft.com.': 2, '2014-07-22.': 4, '"IBM': 1, 'BigInsights': 1, 'Cloud".': 3, '03.ibm.com.': 1, '"SAP': 1, '|': 4, 'Platform".': 6, 'cloudplatform.sap.com.': 1, '2018-08-07.': 1, '"Oracle\'s': 1, 'comprises': 1, 'tools".': 1, '"HDInsight': 1, 'Varia,': 1, 'Jinesh': 1, '(@jinman).': 1, '"Taking': 1, 'Massive': 1, 'Man': 1, 'EC2/S3".': 1, 'Web': 1, 'Blog.': 1, 'Amazon.com.': 3, 'Gottfrid,': 1, 'Derek': 1, '(1': 1, '2007).': 1, '"Self-service,': 1, 'Prorated': 1, 'Super': 1, 'Fun!".': 1, '"AWS': 1, '(EMR)': 1, 'Aws.amazon.com.': 2, '"Amazon': 4, 'Developer': 1, 'Guide"': 1, 'Instances".': 2, 'Supports': 1, '2011-08-18.': 1, 'FAQs".': 1, 'Using': 1, 'Instances': 1, 'EMR': 1, 'YouTube': 1, 'Managed': 2, 'Ctl.io.': 3, '"Managed': 1, 'Cloudera".': 2, 'Simplified:': 1, 'Documentation': 2, 'Platform.': 3, 'Dataproc': 2, 'Cloud-native': 1, '"Quickstarts': 1, '"Cloudera': 1, 'VISION".': 1, 'Vision.cloudera.com.': 1, '"HDP': 1, 'Hortonworks.com.': 1, '22': 1, 'Mapr.com.': 1, 'Pace': 1, 'Has': 1, 'Pick': 1, 'Up".': 1, 'Gigaom.com.': 1, '2011-04-25.': 1, '"Defining': 2, 'Wiki.apache.org.': 1, '2013-03-30.': 1, 'Compatibility:': 1, 'revisited".': 1, 'Mail-archives.apache.org.': 1, '2011-05-10.': 1, 'Manual:': 1, 'Security".': 1, '2014-12-03.': 1, 'Lam,': 1, 'Chuck': 1, '(July': 1, '28,': 1, '2010).': 1, 'Action': 1, '(1st': 3, 'Manning': 1, 'Publications.': 1, '325.': 1, '1-935-18219-6.': 1, 'Venner,': 1, 'Jason': 1, '(June': 2, '22,': 1, '2009).': 2, 'Pro': 1, 'Apress.': 1, '440.': 1, '1-430-21942-4.': 1, '16,': 1, '524.': 1, '0-596-52197-9.': 1, 'Official': 1, 'website': 1, 'popular': 1, 'GitHub': 1, 'Introducing': 1, 'Modern': 1, 'lecture': 1, 'Stanford': 1, 'University': 1, 'Co-Founder': 1, 'CTO': 1, 'Amr': 1, 'Awadallah': 1, '(video': 1, 'archive)': 1, '(YouTube)': 1, 'Philip': 1, 'Zeyliger,': 1, 'Engineering': 1, 'Radio,': 1, 'IEEE': 1, 'Society,': 1, 'Key': 1, 'Role': 1, 'Plays': 1, 'Intelligence': 1, 'Warehousing': 1, 'vte': 2, 'Authority': 1, 'control': 1, 'GND:': 1, '1022420135': 1, 'Categories:': 1, 'FoundationBig': 1, 'productsCloud': 1, 'infrastructureDistributed': 1, 'systemsFree': 1, 'computingFree': 1, 'programmed': 1, '(programming': 1, 'language)Free': 1, 'softwareHadoopSoftware': 1, 'license': 1, 'Navigation': 1, 'menu': 1, 'Not': 1, 'logged': 1, 'Talk': 2, 'Create': 2, 'account': 1, 'Log': 1, 'Article': 1, 'Read': 1, 'View': 1, 'page': 4, 'Featured': 1, 'Current': 1, 'events': 1, 'Random': 1, 'Donate': 1, 'Wikipedia': 5, 'Interaction': 1, 'Help': 1, 'About': 2, 'Community': 1, 'Recent': 1, 'Contact': 2, 'Tools': 1, 'What': 1, 'here': 1, 'Related': 1, 'Upload': 1, 'Special': 1, 'pages': 1, 'Permanent': 1, 'link': 1, 'Page': 1, 'item': 1, 'Cite': 1, 'Print/export': 1, 'book': 1, 'Download': 1, 'PDF': 1, 'Printable': 1, 'Wikimedia': 2, 'Commons': 2, 'Languages': 1, 'العربية': 1, 'Español': 1, 'മലയാളം': 1, 'Português': 1, 'Русский': 1, 'தமிழ்': 1, 'తెలుగు': 1, 'اردو': 1, '中文': 1, 'last': 1, 'edited': 1, '2018,': 1, '23:17': 1, '(UTC).': 1, 'Text': 1, 'Creative': 1, 'Attribution-ShareAlike': 1, 'License;': 1, 'terms': 1, 'apply.': 1, 'site,': 1, 'agree': 1, 'Terms': 1, 'Use': 1, 'Privacy': 2, 'Policy.': 1, 'Wikipedia®': 1, 'registered': 1, 'trademark': 1, 'Foundation,': 1, 'Inc.,': 1, 'non-profit': 1, 'organization.': 1, 'policy': 1, 'Disclaimers': 1, 'Developers': 1, 'Cookie': 1, 'statement': 1, 'Mobile': 1, 'view': 1}