### 한글 폰트 설정
import matplotlib
from matplotlib import font_manager, rc
import matplotlib.pyplot as plt
import platform
path = "C:/Windows/Fonts/malgun.ttf"
if platform.system() == "Windows":
font_name = font_manager.FontProperties(fname=path).get_name()
rc('font', family=font_name)
elif platform.system()=="Darwin":
rc('font', family='AppleGothic')
else:
print("Unknown System")
import tensorflow as tf
import keras
print(tf.__version__)
print(keras.__version__)
2.9.1 2.9.0
from keras.datasets import imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)
print(train_data.shape, train_labels.shape)
print(test_data.shape, test_labels.shape)
(25000,) (25000,) (25000,) (25000,)
# train_data의 하나(numpy)에서 10개 정도 확인해 보기
print(type(train_data[0]), len(train_data[0])) # 자료형과 개수
print("하나의 리뷰 단어 개수 : ", len(train_data[0]))
train_data[0][0:15]
<class 'list'> 218 하나의 리뷰 단어 개수 : 218
[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4]
# 5개의 긍정/부정 확인
train_labels[0:5]
array([1, 0, 0, 1, 0], dtype=int64)
[max(sequence) for sequence in train_data][0:10] # 10개 리뷰의 각 리뷰의 단어 인덱스의 최대값
[7486, 9837, 6905, 9941, 7224, 7982, 9363, 9820, 7612, 8419]
max([max(sequence) for sequence in train_data])
9999
# word_index는 단어와 정수 인덱스를 매핑한 딕셔너리입니다
word_index = imdb.get_word_index()
# 전체 단어:인덱스 쌍의 수
print( len(word_index) ) # 88584개
list_word_index = list([ (value, key) for (key, value) in word_index.items() ])
list_word_index[0:10]
88584
[(34701, 'fawn'), (52006, 'tsukino'), (52007, 'nunnery'), (16816, 'sonja'), (63951, 'vani'), (1408, 'woods'), (16115, 'spiders'), (2345, 'hanging'), (2289, 'woody'), (52008, 'trawling')]
# 정수 인덱스와 단어를 매핑하도록 뒤집습니다
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
reverse_word_index
{34701: 'fawn', 52006: 'tsukino', 52007: 'nunnery', 16816: 'sonja', 63951: 'vani', 1408: 'woods', 16115: 'spiders', 2345: 'hanging', 2289: 'woody', 52008: 'trawling', 52009: "hold's", 11307: 'comically', 40830: 'localized', 30568: 'disobeying', 52010: "'royale", 40831: "harpo's", 52011: 'canet', 19313: 'aileen', 52012: 'acurately', 52013: "diplomat's", 25242: 'rickman', 6746: 'arranged', 52014: 'rumbustious', 52015: 'familiarness', 52016: "spider'", 68804: 'hahahah', 52017: "wood'", 40833: 'transvestism', 34702: "hangin'", 2338: 'bringing', 40834: 'seamier', 34703: 'wooded', 52018: 'bravora', 16817: 'grueling', 1636: 'wooden', 16818: 'wednesday', 52019: "'prix", 34704: 'altagracia', 52020: 'circuitry', 11585: 'crotch', 57766: 'busybody', 52021: "tart'n'tangy", 14129: 'burgade', 52023: 'thrace', 11038: "tom's", 52025: 'snuggles', 29114: 'francesco', 52027: 'complainers', 52125: 'templarios', 40835: '272', 52028: '273', 52130: 'zaniacs', 34706: '275', 27631: 'consenting', 40836: 'snuggled', 15492: 'inanimate', 52030: 'uality', 11926: 'bronte', 4010: 'errors', 3230: 'dialogs', 52031: "yomada's", 34707: "madman's", 30585: 'dialoge', 52033: 'usenet', 40837: 'videodrome', 26338: "kid'", 52034: 'pawed', 30569: "'girlfriend'", 52035: "'pleasure", 52036: "'reloaded'", 40839: "kazakos'", 52037: 'rocque', 52038: 'mailings', 11927: 'brainwashed', 16819: 'mcanally', 52039: "tom''", 25243: 'kurupt', 21905: 'affiliated', 52040: 'babaganoosh', 40840: "noe's", 40841: 'quart', 359: 'kids', 5034: 'uplifting', 7093: 'controversy', 21906: 'kida', 23379: 'kidd', 52041: "error'", 52042: 'neurologist', 18510: 'spotty', 30570: 'cobblers', 9878: 'projection', 40842: 'fastforwarding', 52043: 'sters', 52044: "eggar's", 52045: 'etherything', 40843: 'gateshead', 34708: 'airball', 25244: 'unsinkable', 7180: 'stern', 52046: "cervi's", 40844: 'dnd', 11586: 'dna', 20598: 'insecurity', 52047: "'reboot'", 11037: 'trelkovsky', 52048: 'jaekel', 52049: 'sidebars', 52050: "sforza's", 17633: 'distortions', 52051: 'mutinies', 30602: 'sermons', 40846: '7ft', 52052: 'boobage', 52053: "o'bannon's", 23380: 'populations', 52054: 'chulak', 27633: 'mesmerize', 52055: 'quinnell', 10307: 'yahoo', 52057: 'meteorologist', 42577: 'beswick', 15493: 'boorman', 40847: 'voicework', 52058: "ster'", 22922: 'blustering', 52059: 'hj', 27634: 'intake', 5621: 'morally', 40849: 'jumbling', 52060: 'bowersock', 52061: "'porky's'", 16821: 'gershon', 40850: 'ludicrosity', 52062: 'coprophilia', 40851: 'expressively', 19500: "india's", 34710: "post's", 52063: 'wana', 5283: 'wang', 30571: 'wand', 25245: 'wane', 52321: 'edgeways', 34711: 'titanium', 40852: 'pinta', 178: 'want', 30572: 'pinto', 52065: 'whoopdedoodles', 21908: 'tchaikovsky', 2103: 'travel', 52066: "'victory'", 11928: 'copious', 22433: 'gouge', 52067: "chapters'", 6702: 'barbra', 30573: 'uselessness', 52068: "wan'", 27635: 'assimilated', 16116: 'petiot', 52069: 'most\x85and', 3930: 'dinosaurs', 352: 'wrong', 52070: 'seda', 52071: 'stollen', 34712: 'sentencing', 40853: 'ouroboros', 40854: 'assimilates', 40855: 'colorfully', 27636: 'glenne', 52072: 'dongen', 4760: 'subplots', 52073: 'kiloton', 23381: 'chandon', 34713: "effect'", 27637: 'snugly', 40856: 'kuei', 9092: 'welcomed', 30071: 'dishonor', 52075: 'concurrence', 23382: 'stoicism', 14896: "guys'", 52077: "beroemd'", 6703: 'butcher', 40857: "melfi's", 30623: 'aargh', 20599: 'playhouse', 11308: 'wickedly', 1180: 'fit', 52078: 'labratory', 40859: 'lifeline', 1927: 'screaming', 4287: 'fix', 52079: 'cineliterate', 52080: 'fic', 52081: 'fia', 34714: 'fig', 52082: 'fmvs', 52083: 'fie', 52084: 'reentered', 30574: 'fin', 52085: 'doctresses', 52086: 'fil', 12606: 'zucker', 31931: 'ached', 52088: 'counsil', 52089: 'paterfamilias', 13885: 'songwriter', 34715: 'shivam', 9654: 'hurting', 299: 'effects', 52090: 'slauther', 52091: "'flame'", 52092: 'sommerset', 52093: 'interwhined', 27638: 'whacking', 52094: 'bartok', 8775: 'barton', 21909: 'frewer', 52095: "fi'", 6192: 'ingrid', 30575: 'stribor', 52096: 'approporiately', 52097: 'wobblyhand', 52098: 'tantalisingly', 52099: 'ankylosaurus', 17634: 'parasites', 52100: 'childen', 52101: "jenkins'", 52102: 'metafiction', 17635: 'golem', 40860: 'indiscretion', 23383: "reeves'", 57781: "inamorata's", 52104: 'brittannica', 7916: 'adapt', 30576: "russo's", 48246: 'guitarists', 10553: 'abbott', 40861: 'abbots', 17649: 'lanisha', 40863: 'magickal', 52105: 'mattter', 52106: "'willy", 34716: 'pumpkins', 52107: 'stuntpeople', 30577: 'estimate', 40864: 'ugghhh', 11309: 'gameplay', 52108: "wern't", 40865: "n'sync", 16117: 'sickeningly', 40866: 'chiara', 4011: 'disturbed', 40867: 'portmanteau', 52109: 'ineffectively', 82143: "duchonvey's", 37519: "nasty'", 1285: 'purpose', 52112: 'lazers', 28105: 'lightened', 52113: 'kaliganj', 52114: 'popularism', 18511: "damme's", 30578: 'stylistics', 52115: 'mindgaming', 46449: 'spoilerish', 52117: "'corny'", 34718: 'boerner', 6792: 'olds', 52118: 'bakelite', 27639: 'renovated', 27640: 'forrester', 52119: "lumiere's", 52024: 'gaskets', 884: 'needed', 34719: 'smight', 1297: 'master', 25905: "edie's", 40868: 'seeber', 52120: 'hiya', 52121: 'fuzziness', 14897: 'genesis', 12607: 'rewards', 30579: 'enthrall', 40869: "'about", 52122: "recollection's", 11039: 'mutilated', 52123: 'fatherlands', 52124: "fischer's", 5399: 'positively', 34705: '270', 34720: 'ahmed', 9836: 'zatoichi', 13886: 'bannister', 52127: 'anniversaries', 30580: "helm's", 52128: "'work'", 34721: 'exclaimed', 52129: "'unfunny'", 52029: '274', 544: 'feeling', 52131: "wanda's", 33266: 'dolan', 52133: '278', 52134: 'peacoat', 40870: 'brawny', 40871: 'mishra', 40872: 'worlders', 52135: 'protags', 52136: 'skullcap', 57596: 'dastagir', 5622: 'affairs', 7799: 'wholesome', 52137: 'hymen', 25246: 'paramedics', 52138: 'unpersons', 52139: 'heavyarms', 52140: 'affaire', 52141: 'coulisses', 40873: 'hymer', 52142: 'kremlin', 30581: 'shipments', 52143: 'pixilated', 30582: "'00s", 18512: 'diminishing', 1357: 'cinematic', 14898: 'resonates', 40874: 'simplify', 40875: "nature'", 40876: 'temptresses', 16822: 'reverence', 19502: 'resonated', 34722: 'dailey', 52144: '2\x85', 27641: 'treize', 52145: 'majo', 21910: 'kiya', 52146: 'woolnough', 39797: 'thanatos', 35731: 'sandoval', 40879: 'dorama', 52147: "o'shaughnessy", 4988: 'tech', 32018: 'fugitives', 30583: 'teck', 76125: "'e'", 40881: 'doesn’t', 52149: 'purged', 657: 'saying', 41095: "martians'", 23418: 'norliss', 27642: 'dickey', 52152: 'dicker', 52153: "'sependipity", 8422: 'padded', 57792: 'ordell', 40882: "sturges'", 52154: 'independentcritics', 5745: 'tempted', 34724: "atkinson's", 25247: 'hounded', 52155: 'apace', 15494: 'clicked', 30584: "'humor'", 17177: "martino's", 52156: "'supporting", 52032: 'warmongering', 34725: "zemeckis's", 21911: 'lube', 52157: 'shocky', 7476: 'plate', 40883: 'plata', 40884: 'sturgess', 40885: "nerds'", 20600: 'plato', 34726: 'plath', 40886: 'platt', 52159: 'mcnab', 27643: 'clumsiness', 3899: 'altogether', 42584: 'massacring', 52160: 'bicenntinial', 40887: 'skaal', 14360: 'droning', 8776: 'lds', 21912: 'jaguar', 34727: "cale's", 1777: 'nicely', 4588: 'mummy', 18513: "lot's", 10086: 'patch', 50202: 'kerkhof', 52161: "leader's", 27644: "'movie", 52162: 'uncomfirmed', 40888: 'heirloom', 47360: 'wrangle', 52163: 'emotion\x85', 52164: "'stargate'", 40889: 'pinoy', 40890: 'conchatta', 41128: 'broeke', 40891: 'advisedly', 17636: "barker's", 52166: 'descours', 772: 'lots', 9259: 'lotr', 9879: 'irs', 52167: 'lott', 40892: 'xvi', 34728: 'irk', 52168: 'irl', 6887: 'ira', 21913: 'belzer', 52169: 'irc', 27645: 'ire', 40893: 'requisites', 7693: 'discipline', 52961: 'lyoko', 11310: 'extend', 873: 'nature', 52170: "'dickie'", 40894: 'optimist', 30586: 'lapping', 3900: 'superficial', 52171: 'vestment', 2823: 'extent', 52172: 'tendons', 52173: "heller's", 52174: 'quagmires', 52175: 'miyako', 20601: 'moocow', 52176: "coles'", 40895: 'lookit', 52177: 'ravenously', 40896: 'levitating', 52178: 'perfunctorily', 30587: 'lookin', 40898: "lot'", 52179: 'lookie', 34870: 'fearlessly', 52181: 'libyan', 40899: 'fondles', 35714: 'gopher', 40901: 'wearying', 52182: "nz's", 27646: 'minuses', 52183: 'puposelessly', 52184: 'shandling', 31268: 'decapitates', 11929: 'humming', 40902: "'nother", 21914: 'smackdown', 30588: 'underdone', 40903: 'frf', 52185: 'triviality', 25248: 'fro', 8777: 'bothers', 52186: "'kensington", 73: 'much', 34730: 'muco', 22615: 'wiseguy', 27648: "richie's", 40904: 'tonino', 52187: 'unleavened', 11587: 'fry', 40905: "'tv'", 40906: 'toning', 14361: 'obese', 30589: 'sensationalized', 40907: 'spiv', 6259: 'spit', 7364: 'arkin', 21915: 'charleton', 16823: 'jeon', 21916: 'boardroom', 4989: 'doubts', 3084: 'spin', 53083: 'hepo', 27649: 'wildcat', 10584: 'venoms', 52191: 'misconstrues', 18514: 'mesmerising', 40908: 'misconstrued', 52192: 'rescinds', 52193: 'prostrate', 40909: 'majid', 16479: 'climbed', 34731: 'canoeing', 52195: 'majin', 57804: 'animie', 40910: 'sylke', 14899: 'conditioned', 40911: 'waddell', 52196: '3\x85', 41188: 'hyperdrive', 34732: 'conditioner', 53153: 'bricklayer', 2576: 'hong', 52198: 'memoriam', 30592: 'inventively', 25249: "levant's", 20638: 'portobello', 52200: 'remand', 19504: 'mummified', 27650: 'honk', 19505: 'spews', 40912: 'visitations', 52201: 'mummifies', 25250: 'cavanaugh', 23385: 'zeon', 40913: "jungle's", 34733: 'viertel', 27651: 'frenchmen', 52202: 'torpedoes', 52203: 'schlessinger', 34734: 'torpedoed', 69876: 'blister', 52204: 'cinefest', 34735: 'furlough', 52205: 'mainsequence', 40914: 'mentors', 9094: 'academic', 20602: 'stillness', 40915: 'academia', 52206: 'lonelier', 52207: 'nibby', 52208: "losers'", 40916: 'cineastes', 4449: 'corporate', 40917: 'massaging', 30593: 'bellow', 19506: 'absurdities', 53241: 'expetations', 40918: 'nyfiken', 75638: 'mehras', 52209: 'lasse', 52210: 'visability', 33946: 'militarily', 52211: "elder'", 19023: 'gainsbourg', 20603: 'hah', 13420: 'hai', 34736: 'haj', 25251: 'hak', 4311: 'hal', 4892: 'ham', 53259: 'duffer', 52213: 'haa', 66: 'had', 11930: 'advancement', 16825: 'hag', 25252: "hand'", 13421: 'hay', 20604: 'mcnamara', 52214: "mozart's", 30731: 'duffel', 30594: 'haq', 13887: 'har', 44: 'has', 2401: 'hat', 40919: 'hav', 30595: 'haw', 52215: 'figtings', 15495: 'elders', 52216: 'underpanted', 52217: 'pninson', 27652: 'unequivocally', 23673: "barbara's", 52219: "bello'", 12997: 'indicative', 40920: 'yawnfest', 52220: 'hexploitation', 52221: "loder's", 27653: 'sleuthing', 32622: "justin's", 52222: "'ball", 52223: "'summer", 34935: "'demons'", 52225: "mormon's", 34737: "laughton's", 52226: 'debell', 39724: 'shipyard', 30597: 'unabashedly', 40401: 'disks', 2290: 'crowd', 10087: 'crowe', 56434: "vancouver's", 34738: 'mosques', 6627: 'crown', 52227: 'culpas', 27654: 'crows', 53344: 'surrell', 52229: 'flowless', 52230: 'sheirk', 40923: "'three", 52231: "peterson'", 52232: 'ooverall', 40924: 'perchance', 1321: 'bottom', 53363: 'chabert', 52233: 'sneha', 13888: 'inhuman', 52234: 'ichii', 52235: 'ursla', 30598: 'completly', 40925: 'moviedom', 52236: 'raddick', 51995: 'brundage', 40926: 'brigades', 1181: 'starring', 52237: "'goal'", 52238: 'caskets', 52239: 'willcock', 52240: "threesome's", 52241: "mosque'", 52242: "cover's", 17637: 'spaceships', 40927: 'anomalous', 27655: 'ptsd', 52243: 'shirdan', 21962: 'obscenity', 30599: 'lemmings', 30600: 'duccio', 52244: "levene's", 52245: "'gorby'", 25255: "teenager's", 5340: 'marshall', 9095: 'honeymoon', 3231: 'shoots', 12258: 'despised', 52246: 'okabasho', 8289: 'fabric', 18515: 'cannavale', 3537: 'raped', 52247: "tutt's", 17638: 'grasping', 18516: 'despises', 40928: "thief's", 8926: 'rapes', 52248: 'raper', 27656: "eyre'", 52249: 'walchek', 23386: "elmo's", 40929: 'perfumes', 21918: 'spurting', 52250: "exposition'\x85", 52251: 'denoting', 34740: 'thesaurus', 40930: "shoot'", 49759: 'bonejack', 52253: 'simpsonian', 30601: 'hebetude', 34741: "hallow's", 52254: 'desperation\x85', 34742: 'incinerator', 10308: 'congratulations', 52255: 'humbled', 5924: "else's", 40845: 'trelkovski', 52256: "rape'", 59386: "'chapters'", 52257: '1600s', 7253: 'martian', 25256: 'nicest', 52259: 'eyred', 9457: 'passenger', 6041: 'disgrace', 52260: 'moderne', 5120: 'barrymore', 52261: 'yankovich', 40931: 'moderns', 52262: 'studliest', 52263: 'bedsheet', 14900: 'decapitation', 52264: 'slurring', 52265: "'nunsploitation'", 34743: "'character'", 9880: 'cambodia', 52266: 'rebelious', 27657: 'pasadena', 40932: 'crowne', 52267: "'bedchamber", 52268: 'conjectural', 52269: 'appologize', 52270: 'halfassing', 57816: 'paycheque', 20606: 'palms', 52271: "'islands", 40933: 'hawked', 21919: 'palme', 40934: 'conservatively', 64007: 'larp', 5558: 'palma', 21920: 'smelling', 12998: 'aragorn', 52272: 'hawker', 52273: 'hawkes', 3975: 'explosions', 8059: 'loren', 52274: "pyle's", 6704: 'shootout', 18517: "mike's", 52275: "driscoll's", 40935: 'cogsworth', 52276: "britian's", 34744: 'childs', 52277: "portrait's", 3626: 'chain', 2497: 'whoever', 52278: 'puttered', 52279: 'childe', 52280: 'maywether', 3036: 'chair', 52281: "rance's", 34745: 'machu', 4517: 'ballet', 34746: 'grapples', 76152: 'summerize', 30603: 'freelance', 52283: "andrea's", 52284: '\x91very', 45879: 'coolidge', 18518: 'mache', 52285: 'balled', 40937: 'grappled', 18519: 'macha', 21921: 'underlining', 5623: 'macho', 19507: 'oversight', 25257: 'machi', 11311: 'verbally', 21922: 'tenacious', 40938: 'windshields', 18557: 'paychecks', 3396: 'jerk', 11931: "good'", 34748: 'prancer', 21923: 'prances', 52286: 'olympus', 21924: 'lark', 10785: 'embark', 7365: 'gloomy', 52287: 'jehaan', 52288: 'turaqui', 20607: "child'", 2894: 'locked', 52289: 'pranced', 2588: 'exact', 52290: 'unattuned', 783: 'minute', 16118: 'skewed', 40940: 'hodgins', 34749: 'skewer', 52291: 'think\x85', 38765: 'rosenstein', 52292: 'helmit', 34750: 'wrestlemanias', 16826: 'hindered', 30604: "martha's", 52293: 'cheree', 52294: "pluckin'", 40941: 'ogles', 11932: 'heavyweight', 82190: 'aada', 11312: 'chopping', 61534: 'strongboy', 41342: 'hegemonic', 40942: 'adorns', 41346: 'xxth', 34751: 'nobuhiro', 52298: 'capitães', 52299: 'kavogianni', 13422: 'antwerp', 6538: 'celebrated', 52300: 'roarke', 40943: 'baggins', 31270: 'cheeseburgers', 52301: 'matras', 52302: "nineties'", 52303: "'craig'", 12999: 'celebrates', 3383: 'unintentionally', 14362: 'drafted', 52304: 'climby', 52305: '303', 18520: 'oldies', 9096: 'climbs', 9655: 'honour', 34752: 'plucking', 30074: '305', 5514: 'address', 40944: 'menjou', 42592: "'freak'", 19508: 'dwindling', 9458: 'benson', 52307: 'white’s', 40945: 'shamelessness', 21925: 'impacted', 52308: 'upatz', 3840: 'cusack', 37567: "flavia's", 52309: 'effette', 34753: 'influx', 52310: 'boooooooo', 52311: 'dimitrova', 13423: 'houseman', 25259: 'bigas', 52312: 'boylen', 52313: 'phillipenes', 40946: 'fakery', 27658: "grandpa's", 27659: 'darnell', 19509: 'undergone', 52315: 'handbags', 21926: 'perished', 37778: 'pooped', 27660: 'vigour', 3627: 'opposed', 52316: 'etude', 11799: "caine's", 52317: 'doozers', 34754: 'photojournals', 52318: 'perishes', 34755: 'constrains', 40948: 'migenes', 30605: 'consoled', 16827: 'alastair', 52319: 'wvs', 52320: 'ooooooh', 34756: 'approving', 40949: 'consoles', 52064: 'disparagement', 52322: 'futureistic', 52323: 'rebounding', 52324: "'date", 52325: 'gregoire', 21927: 'rutherford', 34757: 'americanised', 82196: 'novikov', 1042: 'following', 34758: 'munroe', 52326: "morita'", 52327: 'christenssen', 23106: 'oatmeal', 25260: 'fossey', 40950: 'livered', 13000: 'listens', 76164: "'marci", 52330: "otis's", 23387: 'thanking', 16019: 'maude', 34759: 'extensions', 52332: 'ameteurish', 52333: "commender's", 27661: 'agricultural', 4518: 'convincingly', 17639: 'fueled', 54014: 'mahattan', 40952: "paris's", 52336: 'vulkan', 52337: 'stapes', 52338: 'odysessy', 12259: 'harmon', 4252: 'surfing', 23494: 'halloran', 49580: 'unbelieveably', 52339: "'offed'", 30607: 'quadrant', 19510: 'inhabiting', 34760: 'nebbish', 40953: 'forebears', 34761: 'skirmish', 52340: 'ocassionally', 52341: "'resist", 21928: 'impactful', 52342: 'spicier', 40954: 'touristy', 52343: "'football'", 40955: 'webpage', 52345: 'exurbia', 52346: 'jucier', 14901: 'professors', 34762: 'structuring', 30608: 'jig', 40956: 'overlord', 25261: 'disconnect', 82201: 'sniffle', 40957: 'slimeball', 40958: 'jia', 16828: 'milked', 40959: 'banjoes', 1237: 'jim', 52348: 'workforces', 52349: 'jip', 52350: 'rotweiller', 34763: 'mundaneness', 52351: "'ninja'", 11040: "dead'", 40960: "cipriani's", 20608: 'modestly', 52352: "professor'", 40961: 'shacked', 34764: 'bashful', 23388: 'sorter', 16120: 'overpowering', 18521: 'workmanlike', 27662: 'henpecked', 18522: 'sorted', 52354: "jōb's", 52355: "'always", 34765: "'baptists", 52356: 'dreamcatchers', 52357: "'silence'", 21929: 'hickory', 52358: 'fun\x97yet', 52359: 'breakumentary', 15496: 'didn', 52360: 'didi', 52361: 'pealing', 40962: 'dispite', 25262: "italy's", 21930: 'instability', 6539: 'quarter', 12608: 'quartet', 52362: 'padmé', 52363: "'bleedmedry", 52364: 'pahalniuk', 52365: 'honduras', 10786: 'bursting', 41465: "pablo's", 52367: 'irremediably', 40963: 'presages', 57832: 'bowlegged', 65183: 'dalip', 6260: 'entering', 76172: 'newsradio', 54150: 'presaged', 27663: "giallo's", 40964: 'bouyant', 52368: 'amerterish', 18523: 'rajni', 30610: 'leeves', 34767: 'macauley', 612: 'seriously', 52369: 'sugercoma', 52370: 'grimstead', 52371: "'fairy'", 30611: 'zenda', 52372: "'twins'", 17640: 'realisation', 27664: 'highsmith', 7817: 'raunchy', 40965: 'incentives', 52374: 'flatson', 35097: 'snooker', 16829: 'crazies', 14902: 'crazier', 7094: 'grandma', 52375: 'napunsaktha', 30612: 'workmanship', 52376: 'reisner', 61306: "sanford's", 52377: '\x91doña', 6108: 'modest', 19153: "everything's", 40966: 'hamer', 52379: "couldn't'", 13001: 'quibble', 52380: 'socking', 21931: 'tingler', 52381: 'gutman', 40967: 'lachlan', 52382: 'tableaus', 52383: 'headbanger', 2847: 'spoken', 34768: 'cerebrally', 23490: "'road", 21932: 'tableaux', 40968: "proust's", 40969: 'periodical', 52385: "shoveller's", 25263: 'tamara', 17641: 'affords', 3249: 'concert', 87955: "yara's", 52386: 'someome', 8424: 'lingering', 41511: "abraham's", 34769: 'beesley', 34770: 'cherbourg', 28624: 'kagan', 9097: 'snatch', 9260: "miyazaki's", 25264: 'absorbs', 40970: "koltai's", 64027: 'tingled', 19511: 'crossroads', 16121: 'rehab', 52389: 'falworth', 52390: 'sequals', ...}
print(type(reverse_word_index))
dir(reverse_word_index)[-11:] # 딕셔너리 기능 확인
<class 'dict'>
['clear', 'copy', 'fromkeys', 'get', 'items', 'keys', 'pop', 'popitem', 'setdefault', 'update', 'values']
# reverse_word_index.get(인덱스) # 인덱스에 해당되는 단어가 출력
# reverse_word_index.get(인덱스, '?') # 인덱스에 해당되는 단어가 출력되는데, 단어가 없으면 ? 출력
for i in range(0, 50, 1):
print( reverse_word_index.get(i, '?'), end= " " )
print("train 데이터 첫번째 3단어 살펴보기")
print("index 14 : ", reverse_word_index.get(14-3, '?'))
print("index 22 : ", reverse_word_index.get(22-3, '?'))
print("index 16 : ", reverse_word_index.get(16-3, '?'))
? the and a of to is br in it i this that was as for with movie but film on not you are his have he be one all at by an they who so from like her or just about it's out has if some there what good train 데이터 첫번째 3단어 살펴보기 index 14 : this index 22 : film index 16 : was
print( train_data[0] ) # 첫번째 리뷰(숫자 인덱스)
[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
# train_data[0] : 하나의 리뷰(3, 6, 2, 5, 10...) => 218단어 ...
# [reverse_word_index.get(i - 3, '?') for i in train_data[0]]
print(len( train_data[0]) ) # 첫번째 리뷰는 218개 인덱스(단어)로 이루어져 있다.
print([i for i in train_data[0]]) # train_data[0]인덱스가 for문이 돌아가면서 리스트 형태로 만들어진다.
218 [1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]
# 두번째 리뷰 - 189개 인덱스(단어)로 이루어져 있고, 이에 대한 리스트를 만들어 출력해 본다.
print(len( train_data[1] )) # 첫번째 리뷰는 218개 인덱스(단어)로 이루어져 있다.
print([i for i in train_data[1] ]) # train_data[0]인덱스가 for문이 돌아가면서 리스트 형태로 만들어진다.
189 [1, 194, 1153, 194, 8255, 78, 228, 5, 6, 1463, 4369, 5012, 134, 26, 4, 715, 8, 118, 1634, 14, 394, 20, 13, 119, 954, 189, 102, 5, 207, 110, 3103, 21, 14, 69, 188, 8, 30, 23, 7, 4, 249, 126, 93, 4, 114, 9, 2300, 1523, 5, 647, 4, 116, 9, 35, 8163, 4, 229, 9, 340, 1322, 4, 118, 9, 4, 130, 4901, 19, 4, 1002, 5, 89, 29, 952, 46, 37, 4, 455, 9, 45, 43, 38, 1543, 1905, 398, 4, 1649, 26, 6853, 5, 163, 11, 3215, 2, 4, 1153, 9, 194, 775, 7, 8255, 2, 349, 2637, 148, 605, 2, 8003, 15, 123, 125, 68, 2, 6853, 15, 349, 165, 4362, 98, 5, 4, 228, 9, 43, 2, 1157, 15, 299, 120, 5, 120, 174, 11, 220, 175, 136, 50, 9, 4373, 228, 8255, 5, 2, 656, 245, 2350, 5, 4, 9837, 131, 152, 491, 18, 2, 32, 7464, 1212, 14, 9, 6, 371, 78, 22, 625, 64, 1382, 9, 8, 168, 145, 23, 4, 1690, 15, 16, 4, 1355, 5, 28, 6, 52, 154, 462, 33, 89, 78, 285, 16, 145, 95]
# [reverse_word_index.get(i - 3, '?') for i in train_data[0]]
# 의미 : 각각의 인덱스(218)에 대한 단어를 리스트 형태로 만든다.
print("첫번째 리뷰의 인덱스를 단어로 매칭시켜서 보여준 것")
print([reverse_word_index.get(i - 3, '?') for i in train_data[0]])
# ' '.join([reverse_word_index.get(i - 3, '?') for i in train_data[0]])
# 각각의 단어 리스트를 공백하나를 넣어주면서 하나의 문자열로 묶어 준것.
첫번째 리뷰의 인덱스를 단어로 매칭시켜서 보여준 것 ['?', 'this', 'film', 'was', 'just', 'brilliant', 'casting', 'location', 'scenery', 'story', 'direction', "everyone's", 'really', 'suited', 'the', 'part', 'they', 'played', 'and', 'you', 'could', 'just', 'imagine', 'being', 'there', 'robert', '?', 'is', 'an', 'amazing', 'actor', 'and', 'now', 'the', 'same', 'being', 'director', '?', 'father', 'came', 'from', 'the', 'same', 'scottish', 'island', 'as', 'myself', 'so', 'i', 'loved', 'the', 'fact', 'there', 'was', 'a', 'real', 'connection', 'with', 'this', 'film', 'the', 'witty', 'remarks', 'throughout', 'the', 'film', 'were', 'great', 'it', 'was', 'just', 'brilliant', 'so', 'much', 'that', 'i', 'bought', 'the', 'film', 'as', 'soon', 'as', 'it', 'was', 'released', 'for', '?', 'and', 'would', 'recommend', 'it', 'to', 'everyone', 'to', 'watch', 'and', 'the', 'fly', 'fishing', 'was', 'amazing', 'really', 'cried', 'at', 'the', 'end', 'it', 'was', 'so', 'sad', 'and', 'you', 'know', 'what', 'they', 'say', 'if', 'you', 'cry', 'at', 'a', 'film', 'it', 'must', 'have', 'been', 'good', 'and', 'this', 'definitely', 'was', 'also', '?', 'to', 'the', 'two', 'little', "boy's", 'that', 'played', 'the', '?', 'of', 'norman', 'and', 'paul', 'they', 'were', 'just', 'brilliant', 'children', 'are', 'often', 'left', 'out', 'of', 'the', '?', 'list', 'i', 'think', 'because', 'the', 'stars', 'that', 'play', 'them', 'all', 'grown', 'up', 'are', 'such', 'a', 'big', 'profile', 'for', 'the', 'whole', 'film', 'but', 'these', 'children', 'are', 'amazing', 'and', 'should', 'be', 'praised', 'for', 'what', 'they', 'have', 'done', "don't", 'you', 'think', 'the', 'whole', 'story', 'was', 'so', 'lovely', 'because', 'it', 'was', 'true', 'and', 'was', "someone's", 'life', 'after', 'all', 'that', 'was', 'shared', 'with', 'us', 'all']
# 0, 1, 2는 '패딩', '문서 시작', '사전에 없음'을 위한 인덱스이므로 3을 뺍니다
decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in train_data[0]])
decoded_review
"? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all"
원핫 인코딩
나의 첫번째 리뷰
나의 두번째 리뷰
나의 세번째 리뷰
또 다른 표현
나의 세번째 리뷰
import numpy as np
def vectorize_sequences(sequences, dimension=10000):
# 크기가 (len(sequences), dimension))이고 모든 원소가 0인 행렬을 만듭니다
results = np.zeros((len(sequences), dimension))
for i, sequence in enumerate(sequences):
results[i, sequence] = 1. # results[i]에서 특정 인덱스의 위치를 1로 만듭니다
return results
print("변환 전 : ", train_data.shape)
X_train = vectorize_sequences(train_data)
print("변환 후 : ", X_train.shape)
X_train
변환 전 : (25000,) 변환 후 : (25000, 10000)
array([[0., 1., 1., ..., 0., 0., 0.], [0., 1., 1., ..., 0., 0., 0.], [0., 1., 1., ..., 0., 0., 0.], ..., [0., 1., 1., ..., 0., 0., 0.], [0., 1., 1., ..., 0., 0., 0.], [0., 1., 1., ..., 0., 0., 0.]])
print("변환 전 : ",test_data.shape)
X_test = vectorize_sequences(test_data)
print("변환 후 : ",X_test.shape)
X_test
변환 전 : (25000,) 변환 후 : (25000, 10000)
array([[0., 1., 1., ..., 0., 0., 0.], [0., 1., 1., ..., 0., 0., 0.], [0., 1., 1., ..., 0., 0., 0.], ..., [0., 1., 1., ..., 0., 0., 0.], [0., 1., 1., ..., 0., 0., 0.], [0., 1., 1., ..., 0., 0., 0.]])
a = [1,2]
np.asarray(a)
array([1, 2])
print(type(train_labels), type(test_labels))
print(train_labels.shape, test_labels.shape)
<class 'numpy.ndarray'> <class 'numpy.ndarray'> (25000,) (25000,)
y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')
print(type(y_train), type(y_test))
y_train.shape, y_test.shape
<class 'numpy.ndarray'> <class 'numpy.ndarray'>
((25000,), (25000,))
y_train
array([1., 0., 0., ..., 0., 1., 0.], dtype=float32)
y_test
array([0., 1., 1., ..., 0., 0., 0.], dtype=float32)
from keras import models
from keras import layers
model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])
from tensorflow.keras import optimizers
model.compile(optimizer=optimizers.RMSprop(learning_rate=0.01),
loss='binary_crossentropy',
metrics=['accuracy'] )
from keras import losses
from keras import metrics
model.compile(optimizer=optimizers.RMSprop(lr=0.001),
loss=losses.binary_crossentropy,
metrics=[metrics.binary_accuracy])
X_train.shape, y_train.shape
((25000, 10000), (25000,))
X_val = X_train[:10000] # 자체 검증
partial_X_train = X_train[10000:] # 학습용 10000~25000
y_val = y_train[:10000] # 자체 검증
partial_y_train = y_train[10000:] # 학습용 10000~25000
history = model.fit(partial_X_train,
partial_y_train,
epochs=20,
batch_size=512,
validation_data=(X_val, y_val))
Epoch 1/20 30/30 [==============================] - 2s 33ms/step - loss: 0.6185 - accuracy: 0.7185 - val_loss: 0.5058 - val_accuracy: 0.7568 Epoch 2/20 30/30 [==============================] - 1s 22ms/step - loss: 0.3194 - accuracy: 0.8754 - val_loss: 0.3907 - val_accuracy: 0.8449 Epoch 3/20 30/30 [==============================] - 1s 22ms/step - loss: 0.2129 - accuracy: 0.9161 - val_loss: 0.4216 - val_accuracy: 0.8510 Epoch 4/20 30/30 [==============================] - 1s 22ms/step - loss: 0.1684 - accuracy: 0.9364 - val_loss: 0.4502 - val_accuracy: 0.8439 Epoch 5/20 30/30 [==============================] - 1s 22ms/step - loss: 0.1206 - accuracy: 0.9571 - val_loss: 0.3564 - val_accuracy: 0.8818 Epoch 6/20 30/30 [==============================] - 1s 22ms/step - loss: 0.0937 - accuracy: 0.9696 - val_loss: 0.5479 - val_accuracy: 0.8514 Epoch 7/20 30/30 [==============================] - 1s 22ms/step - loss: 0.1148 - accuracy: 0.9612 - val_loss: 0.3717 - val_accuracy: 0.8778 Epoch 8/20 30/30 [==============================] - 1s 23ms/step - loss: 0.0657 - accuracy: 0.9793 - val_loss: 0.4201 - val_accuracy: 0.8771 Epoch 9/20 30/30 [==============================] - 1s 23ms/step - loss: 0.0197 - accuracy: 0.9941 - val_loss: 0.6530 - val_accuracy: 0.8678 Epoch 10/20 30/30 [==============================] - 1s 22ms/step - loss: 0.0907 - accuracy: 0.9826 - val_loss: 0.6377 - val_accuracy: 0.8759 Epoch 11/20 30/30 [==============================] - 1s 23ms/step - loss: 0.0054 - accuracy: 0.9993 - val_loss: 0.7571 - val_accuracy: 0.8730 Epoch 12/20 30/30 [==============================] - 1s 22ms/step - loss: 0.0912 - accuracy: 0.9839 - val_loss: 0.7324 - val_accuracy: 0.8717 Epoch 13/20 30/30 [==============================] - 1s 22ms/step - loss: 0.0027 - accuracy: 0.9998 - val_loss: 0.8392 - val_accuracy: 0.8735 Epoch 14/20 30/30 [==============================] - 1s 22ms/step - loss: 0.0015 - accuracy: 0.9997 - val_loss: 0.9948 - val_accuracy: 0.8704 Epoch 15/20 30/30 [==============================] - 1s 22ms/step - loss: 0.1047 - accuracy: 0.9872 - val_loss: 0.9747 - val_accuracy: 0.8626 Epoch 16/20 30/30 [==============================] - 1s 22ms/step - loss: 0.0015 - accuracy: 0.9999 - val_loss: 1.0148 - val_accuracy: 0.8699 Epoch 17/20 30/30 [==============================] - 1s 22ms/step - loss: 4.2119e-04 - accuracy: 0.9999 - val_loss: 1.2154 - val_accuracy: 0.8697 Epoch 18/20 30/30 [==============================] - 1s 23ms/step - loss: 2.1195e-04 - accuracy: 0.9999 - val_loss: 1.3966 - val_accuracy: 0.8689 Epoch 19/20 30/30 [==============================] - 1s 22ms/step - loss: 1.5039e-04 - accuracy: 0.9999 - val_loss: 1.5292 - val_accuracy: 0.8687 Epoch 20/20 30/30 [==============================] - 1s 20ms/step - loss: 3.7695e-06 - accuracy: 1.0000 - val_loss: 1.6868 - val_accuracy: 0.8699
history_dict = history.history
history_dict.keys()
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
import matplotlib.pyplot as plt
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.figure(figsize=(12,7))
plt.subplot(1,2,1)
# ‘bo’는 파란색 점을 의미합니다
plt.plot(epochs, loss, 'bo', label='Training loss')
# ‘b’는 파란색 실선을 의미합니다
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('학습과 평가 데이터 셋 - 손실')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.subplot(1,2,2)
acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('학습과 평가 데이터 셋 - 정확도')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer='rmsprop',
loss='binary_crossentropy',
metrics=['accuracy'])
hist = model.fit(X_train, y_train, epochs=4, batch_size=512, validation_data=(X_val, y_val))
results = model.evaluate(X_test, y_test)
Epoch 1/4 49/49 [==============================] - 2s 24ms/step - loss: 0.4827 - accuracy: 0.8156 - val_loss: 0.3221 - val_accuracy: 0.9048 Epoch 2/4 49/49 [==============================] - 1s 18ms/step - loss: 0.2778 - accuracy: 0.9088 - val_loss: 0.2105 - val_accuracy: 0.9367 Epoch 3/4 49/49 [==============================] - 1s 19ms/step - loss: 0.2094 - accuracy: 0.9268 - val_loss: 0.1765 - val_accuracy: 0.9422 Epoch 4/4 49/49 [==============================] - 1s 18ms/step - loss: 0.1763 - accuracy: 0.9392 - val_loss: 0.1520 - val_accuracy: 0.9503 782/782 [==============================] - 2s 2ms/step - loss: 0.2963 - accuracy: 0.8814
results
[0.2962694466114044, 0.8813999891281128]
history_dict = hist.history
history_dict.keys()
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
acc = hist.history['accuracy']
val_acc = hist.history['val_accuracy']
loss = hist.history['loss']
val_loss = hist.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.figure(figsize=(12,7))
plt.subplot(1,2,1)
# ‘bo’는 파란색 점을 의미합니다
plt.plot(epochs, loss, 'bo', label='Training loss')
# ‘b’는 파란색 실선을 의미합니다
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('학습과 평가 데이터 셋 - 손실')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.subplot(1,2,2)
acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('학습과 평가 데이터 셋 - 정확도')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
model.predict(X_test)
782/782 [==============================] - 2s 2ms/step
array([[0.2491391 ], [0.9993358 ], [0.92614776], ..., [0.15147226], [0.11113814], [0.67956156]], dtype=float32)
model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer=optimizers.RMSprop(learning_rate=0.001),
loss='binary_crossentropy',
metrics=['accuracy'])
hist2 = model.fit(partial_X_train,
partial_y_train,
epochs=20,
batch_size=128,
validation_data=(X_val, y_val))
Epoch 1/20 118/118 [==============================] - 2s 11ms/step - loss: 0.4115 - accuracy: 0.8371 - val_loss: 0.2875 - val_accuracy: 0.8887 Epoch 2/20 118/118 [==============================] - 1s 9ms/step - loss: 0.2118 - accuracy: 0.9223 - val_loss: 0.2787 - val_accuracy: 0.8876 Epoch 3/20 118/118 [==============================] - 1s 9ms/step - loss: 0.1554 - accuracy: 0.9413 - val_loss: 0.3085 - val_accuracy: 0.8834 Epoch 4/20 118/118 [==============================] - 1s 9ms/step - loss: 0.1187 - accuracy: 0.9581 - val_loss: 0.3329 - val_accuracy: 0.8812 Epoch 5/20 118/118 [==============================] - 1s 9ms/step - loss: 0.0897 - accuracy: 0.9687 - val_loss: 0.3767 - val_accuracy: 0.8758 Epoch 6/20 118/118 [==============================] - 1s 9ms/step - loss: 0.0675 - accuracy: 0.9771 - val_loss: 0.4446 - val_accuracy: 0.8713 Epoch 7/20 118/118 [==============================] - 1s 9ms/step - loss: 0.0480 - accuracy: 0.9843 - val_loss: 0.5111 - val_accuracy: 0.8693 Epoch 8/20 118/118 [==============================] - 1s 9ms/step - loss: 0.0339 - accuracy: 0.9887 - val_loss: 0.5585 - val_accuracy: 0.8676 Epoch 9/20 118/118 [==============================] - 1s 9ms/step - loss: 0.0233 - accuracy: 0.9923 - val_loss: 0.6355 - val_accuracy: 0.8644 Epoch 10/20 118/118 [==============================] - 1s 9ms/step - loss: 0.0147 - accuracy: 0.9957 - val_loss: 0.7187 - val_accuracy: 0.8601 Epoch 11/20 118/118 [==============================] - 1s 9ms/step - loss: 0.0099 - accuracy: 0.9977 - val_loss: 0.8024 - val_accuracy: 0.8609 Epoch 12/20 118/118 [==============================] - 1s 9ms/step - loss: 0.0052 - accuracy: 0.9989 - val_loss: 0.9183 - val_accuracy: 0.8602 Epoch 13/20 118/118 [==============================] - 1s 9ms/step - loss: 0.0029 - accuracy: 0.9995 - val_loss: 1.0095 - val_accuracy: 0.8553 Epoch 14/20 118/118 [==============================] - 1s 9ms/step - loss: 0.0018 - accuracy: 0.9996 - val_loss: 1.1316 - val_accuracy: 0.8581 Epoch 15/20 118/118 [==============================] - 1s 9ms/step - loss: 8.1083e-04 - accuracy: 0.9999 - val_loss: 1.2461 - val_accuracy: 0.8585 Epoch 16/20 118/118 [==============================] - 1s 9ms/step - loss: 3.3346e-04 - accuracy: 1.0000 - val_loss: 1.3680 - val_accuracy: 0.8563 Epoch 17/20 118/118 [==============================] - 1s 9ms/step - loss: 4.4051e-04 - accuracy: 0.9999 - val_loss: 1.5174 - val_accuracy: 0.8544 Epoch 18/20 118/118 [==============================] - 1s 9ms/step - loss: 5.0479e-05 - accuracy: 1.0000 - val_loss: 1.6037 - val_accuracy: 0.8543 Epoch 19/20 118/118 [==============================] - 1s 9ms/step - loss: 1.6217e-04 - accuracy: 1.0000 - val_loss: 1.7044 - val_accuracy: 0.8563 Epoch 20/20 118/118 [==============================] - 1s 9ms/step - loss: 1.1381e-04 - accuracy: 0.9999 - val_loss: 1.7591 - val_accuracy: 0.8558
history_dict = hist2.history
history_dict.keys()
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
import matplotlib.pyplot as plt
acc = hist2.history['accuracy']
val_acc = hist2.history['val_accuracy']
loss = hist2.history['loss']
val_loss = hist2.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.figure(figsize=(12,7))
plt.subplot(1,2,1)
# ‘bo’는 파란색 점을 의미합니다
plt.plot(epochs, loss, 'bo', label='Training loss')
# ‘b’는 파란색 실선을 의미합니다
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('학습용, 평가용 데이터 - 손실')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.subplot(1,2,2)
acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('학습용, 평가용 데이터 - 정확도')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer=optimizers.RMSprop(learning_rate=0.001),
loss='binary_crossentropy',
metrics=['accuracy'])
model.fit(X_train, y_train, epochs=4, batch_size=128)
results = model.evaluate(X_test, y_test)
print("에폭수 : ", epochs, "배치사이즈 :", 128)
print("loss , accuracy", results)
Epoch 1/4 196/196 [==============================] - 2s 6ms/step - loss: 0.3465 - accuracy: 0.8613 Epoch 2/4 196/196 [==============================] - 1s 7ms/step - loss: 0.2074 - accuracy: 0.9223 Epoch 3/4 196/196 [==============================] - 1s 7ms/step - loss: 0.1677 - accuracy: 0.9380 Epoch 4/4 196/196 [==============================] - 1s 6ms/step - loss: 0.1435 - accuracy: 0.9485 782/782 [==============================] - 2s 2ms/step - loss: 0.3528 - accuracy: 0.8730 에폭수 : range(1, 21) 배치사이즈 : 128 loss , accuracy [0.3528025448322296, 0.8730400204658508]