### 한글 폰트 설정
import matplotlib
from matplotlib import font_manager, rc
import matplotlib.pyplot as plt
import platform

path = "C:/Windows/Fonts/malgun.ttf"
if platform.system() == "Windows":
    font_name = font_manager.FontProperties(fname=path).get_name()
    rc('font', family=font_name)
elif platform.system()=="Darwin":
    rc('font', family='AppleGothic')
else:
    print("Unknown System")


import tensorflow as tf
import keras

print(tf.__version__)
print(keras.__version__)

2.9.1
2.9.0


from keras.datasets import imdb

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)


print(train_data.shape, train_labels.shape)
print(test_data.shape, test_labels.shape)

(25000,) (25000,)
(25000,) (25000,)


# train_data의 하나(numpy)에서 10개 정도 확인해 보기 
print(type(train_data[0]),  len(train_data[0]))  # 자료형과 개수
print("하나의 리뷰 단어 개수 : ", len(train_data[0]))
train_data[0][0:15]

<class 'list'> 218
하나의 리뷰 단어 개수 :  218

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4]


# 5개의 긍정/부정 확인
train_labels[0:5]

array([1, 0, 0, 1, 0], dtype=int64)


[max(sequence) for sequence in train_data][0:10] # 10개 리뷰의 각 리뷰의 단어 인덱스의 최대값

[7486, 9837, 6905, 9941, 7224, 7982, 9363, 9820, 7612, 8419]


max([max(sequence) for sequence in train_data])

9999


# word_index는 단어와 정수 인덱스를 매핑한 딕셔너리입니다
word_index = imdb.get_word_index()

# 전체 단어:인덱스 쌍의 수
print( len(word_index) )  # 88584개
list_word_index = list([ (value, key) for (key, value) in word_index.items() ])
list_word_index[0:10]

88584

[(34701, 'fawn'),
 (52006, 'tsukino'),
 (52007, 'nunnery'),
 (16816, 'sonja'),
 (63951, 'vani'),
 (1408, 'woods'),
 (16115, 'spiders'),
 (2345, 'hanging'),
 (2289, 'woody'),
 (52008, 'trawling')]


# 정수 인덱스와 단어를 매핑하도록 뒤집습니다
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
reverse_word_index

{34701: 'fawn',
 52006: 'tsukino',
 52007: 'nunnery',
 16816: 'sonja',
 63951: 'vani',
 1408: 'woods',
 16115: 'spiders',
 2345: 'hanging',
 2289: 'woody',
 52008: 'trawling',
 52009: "hold's",
 11307: 'comically',
 40830: 'localized',
 30568: 'disobeying',
 52010: "'royale",
 40831: "harpo's",
 52011: 'canet',
 19313: 'aileen',
 52012: 'acurately',
 52013: "diplomat's",
 25242: 'rickman',
 6746: 'arranged',
 52014: 'rumbustious',
 52015: 'familiarness',
 52016: "spider'",
 68804: 'hahahah',
 52017: "wood'",
 40833: 'transvestism',
 34702: "hangin'",
 2338: 'bringing',
 40834: 'seamier',
 34703: 'wooded',
 52018: 'bravora',
 16817: 'grueling',
 1636: 'wooden',
 16818: 'wednesday',
 52019: "'prix",
 34704: 'altagracia',
 52020: 'circuitry',
 11585: 'crotch',
 57766: 'busybody',
 52021: "tart'n'tangy",
 14129: 'burgade',
 52023: 'thrace',
 11038: "tom's",
 52025: 'snuggles',
 29114: 'francesco',
 52027: 'complainers',
 52125: 'templarios',
 40835: '272',
 52028: '273',
 52130: 'zaniacs',
 34706: '275',
 27631: 'consenting',
 40836: 'snuggled',
 15492: 'inanimate',
 52030: 'uality',
 11926: 'bronte',
 4010: 'errors',
 3230: 'dialogs',
 52031: "yomada's",
 34707: "madman's",
 30585: 'dialoge',
 52033: 'usenet',
 40837: 'videodrome',
 26338: "kid'",
 52034: 'pawed',
 30569: "'girlfriend'",
 52035: "'pleasure",
 52036: "'reloaded'",
 40839: "kazakos'",
 52037: 'rocque',
 52038: 'mailings',
 11927: 'brainwashed',
 16819: 'mcanally',
 52039: "tom''",
 25243: 'kurupt',
 21905: 'affiliated',
 52040: 'babaganoosh',
 40840: "noe's",
 40841: 'quart',
 359: 'kids',
 5034: 'uplifting',
 7093: 'controversy',
 21906: 'kida',
 23379: 'kidd',
 52041: "error'",
 52042: 'neurologist',
 18510: 'spotty',
 30570: 'cobblers',
 9878: 'projection',
 40842: 'fastforwarding',
 52043: 'sters',
 52044: "eggar's",
 52045: 'etherything',
 40843: 'gateshead',
 34708: 'airball',
 25244: 'unsinkable',
 7180: 'stern',
 52046: "cervi's",
 40844: 'dnd',
 11586: 'dna',
 20598: 'insecurity',
 52047: "'reboot'",
 11037: 'trelkovsky',
 52048: 'jaekel',
 52049: 'sidebars',
 52050: "sforza's",
 17633: 'distortions',
 52051: 'mutinies',
 30602: 'sermons',
 40846: '7ft',
 52052: 'boobage',
 52053: "o'bannon's",
 23380: 'populations',
 52054: 'chulak',
 27633: 'mesmerize',
 52055: 'quinnell',
 10307: 'yahoo',
 52057: 'meteorologist',
 42577: 'beswick',
 15493: 'boorman',
 40847: 'voicework',
 52058: "ster'",
 22922: 'blustering',
 52059: 'hj',
 27634: 'intake',
 5621: 'morally',
 40849: 'jumbling',
 52060: 'bowersock',
 52061: "'porky's'",
 16821: 'gershon',
 40850: 'ludicrosity',
 52062: 'coprophilia',
 40851: 'expressively',
 19500: "india's",
 34710: "post's",
 52063: 'wana',
 5283: 'wang',
 30571: 'wand',
 25245: 'wane',
 52321: 'edgeways',
 34711: 'titanium',
 40852: 'pinta',
 178: 'want',
 30572: 'pinto',
 52065: 'whoopdedoodles',
 21908: 'tchaikovsky',
 2103: 'travel',
 52066: "'victory'",
 11928: 'copious',
 22433: 'gouge',
 52067: "chapters'",
 6702: 'barbra',
 30573: 'uselessness',
 52068: "wan'",
 27635: 'assimilated',
 16116: 'petiot',
 52069: 'most\x85and',
 3930: 'dinosaurs',
 352: 'wrong',
 52070: 'seda',
 52071: 'stollen',
 34712: 'sentencing',
 40853: 'ouroboros',
 40854: 'assimilates',
 40855: 'colorfully',
 27636: 'glenne',
 52072: 'dongen',
 4760: 'subplots',
 52073: 'kiloton',
 23381: 'chandon',
 34713: "effect'",
 27637: 'snugly',
 40856: 'kuei',
 9092: 'welcomed',
 30071: 'dishonor',
 52075: 'concurrence',
 23382: 'stoicism',
 14896: "guys'",
 52077: "beroemd'",
 6703: 'butcher',
 40857: "melfi's",
 30623: 'aargh',
 20599: 'playhouse',
 11308: 'wickedly',
 1180: 'fit',
 52078: 'labratory',
 40859: 'lifeline',
 1927: 'screaming',
 4287: 'fix',
 52079: 'cineliterate',
 52080: 'fic',
 52081: 'fia',
 34714: 'fig',
 52082: 'fmvs',
 52083: 'fie',
 52084: 'reentered',
 30574: 'fin',
 52085: 'doctresses',
 52086: 'fil',
 12606: 'zucker',
 31931: 'ached',
 52088: 'counsil',
 52089: 'paterfamilias',
 13885: 'songwriter',
 34715: 'shivam',
 9654: 'hurting',
 299: 'effects',
 52090: 'slauther',
 52091: "'flame'",
 52092: 'sommerset',
 52093: 'interwhined',
 27638: 'whacking',
 52094: 'bartok',
 8775: 'barton',
 21909: 'frewer',
 52095: "fi'",
 6192: 'ingrid',
 30575: 'stribor',
 52096: 'approporiately',
 52097: 'wobblyhand',
 52098: 'tantalisingly',
 52099: 'ankylosaurus',
 17634: 'parasites',
 52100: 'childen',
 52101: "jenkins'",
 52102: 'metafiction',
 17635: 'golem',
 40860: 'indiscretion',
 23383: "reeves'",
 57781: "inamorata's",
 52104: 'brittannica',
 7916: 'adapt',
 30576: "russo's",
 48246: 'guitarists',
 10553: 'abbott',
 40861: 'abbots',
 17649: 'lanisha',
 40863: 'magickal',
 52105: 'mattter',
 52106: "'willy",
 34716: 'pumpkins',
 52107: 'stuntpeople',
 30577: 'estimate',
 40864: 'ugghhh',
 11309: 'gameplay',
 52108: "wern't",
 40865: "n'sync",
 16117: 'sickeningly',
 40866: 'chiara',
 4011: 'disturbed',
 40867: 'portmanteau',
 52109: 'ineffectively',
 82143: "duchonvey's",
 37519: "nasty'",
 1285: 'purpose',
 52112: 'lazers',
 28105: 'lightened',
 52113: 'kaliganj',
 52114: 'popularism',
 18511: "damme's",
 30578: 'stylistics',
 52115: 'mindgaming',
 46449: 'spoilerish',
 52117: "'corny'",
 34718: 'boerner',
 6792: 'olds',
 52118: 'bakelite',
 27639: 'renovated',
 27640: 'forrester',
 52119: "lumiere's",
 52024: 'gaskets',
 884: 'needed',
 34719: 'smight',
 1297: 'master',
 25905: "edie's",
 40868: 'seeber',
 52120: 'hiya',
 52121: 'fuzziness',
 14897: 'genesis',
 12607: 'rewards',
 30579: 'enthrall',
 40869: "'about",
 52122: "recollection's",
 11039: 'mutilated',
 52123: 'fatherlands',
 52124: "fischer's",
 5399: 'positively',
 34705: '270',
 34720: 'ahmed',
 9836: 'zatoichi',
 13886: 'bannister',
 52127: 'anniversaries',
 30580: "helm's",
 52128: "'work'",
 34721: 'exclaimed',
 52129: "'unfunny'",
 52029: '274',
 544: 'feeling',
 52131: "wanda's",
 33266: 'dolan',
 52133: '278',
 52134: 'peacoat',
 40870: 'brawny',
 40871: 'mishra',
 40872: 'worlders',
 52135: 'protags',
 52136: 'skullcap',
 57596: 'dastagir',
 5622: 'affairs',
 7799: 'wholesome',
 52137: 'hymen',
 25246: 'paramedics',
 52138: 'unpersons',
 52139: 'heavyarms',
 52140: 'affaire',
 52141: 'coulisses',
 40873: 'hymer',
 52142: 'kremlin',
 30581: 'shipments',
 52143: 'pixilated',
 30582: "'00s",
 18512: 'diminishing',
 1357: 'cinematic',
 14898: 'resonates',
 40874: 'simplify',
 40875: "nature'",
 40876: 'temptresses',
 16822: 'reverence',
 19502: 'resonated',
 34722: 'dailey',
 52144: '2\x85',
 27641: 'treize',
 52145: 'majo',
 21910: 'kiya',
 52146: 'woolnough',
 39797: 'thanatos',
 35731: 'sandoval',
 40879: 'dorama',
 52147: "o'shaughnessy",
 4988: 'tech',
 32018: 'fugitives',
 30583: 'teck',
 76125: "'e'",
 40881: 'doesn’t',
 52149: 'purged',
 657: 'saying',
 41095: "martians'",
 23418: 'norliss',
 27642: 'dickey',
 52152: 'dicker',
 52153: "'sependipity",
 8422: 'padded',
 57792: 'ordell',
 40882: "sturges'",
 52154: 'independentcritics',
 5745: 'tempted',
 34724: "atkinson's",
 25247: 'hounded',
 52155: 'apace',
 15494: 'clicked',
 30584: "'humor'",
 17177: "martino's",
 52156: "'supporting",
 52032: 'warmongering',
 34725: "zemeckis's",
 21911: 'lube',
 52157: 'shocky',
 7476: 'plate',
 40883: 'plata',
 40884: 'sturgess',
 40885: "nerds'",
 20600: 'plato',
 34726: 'plath',
 40886: 'platt',
 52159: 'mcnab',
 27643: 'clumsiness',
 3899: 'altogether',
 42584: 'massacring',
 52160: 'bicenntinial',
 40887: 'skaal',
 14360: 'droning',
 8776: 'lds',
 21912: 'jaguar',
 34727: "cale's",
 1777: 'nicely',
 4588: 'mummy',
 18513: "lot's",
 10086: 'patch',
 50202: 'kerkhof',
 52161: "leader's",
 27644: "'movie",
 52162: 'uncomfirmed',
 40888: 'heirloom',
 47360: 'wrangle',
 52163: 'emotion\x85',
 52164: "'stargate'",
 40889: 'pinoy',
 40890: 'conchatta',
 41128: 'broeke',
 40891: 'advisedly',
 17636: "barker's",
 52166: 'descours',
 772: 'lots',
 9259: 'lotr',
 9879: 'irs',
 52167: 'lott',
 40892: 'xvi',
 34728: 'irk',
 52168: 'irl',
 6887: 'ira',
 21913: 'belzer',
 52169: 'irc',
 27645: 'ire',
 40893: 'requisites',
 7693: 'discipline',
 52961: 'lyoko',
 11310: 'extend',
 873: 'nature',
 52170: "'dickie'",
 40894: 'optimist',
 30586: 'lapping',
 3900: 'superficial',
 52171: 'vestment',
 2823: 'extent',
 52172: 'tendons',
 52173: "heller's",
 52174: 'quagmires',
 52175: 'miyako',
 20601: 'moocow',
 52176: "coles'",
 40895: 'lookit',
 52177: 'ravenously',
 40896: 'levitating',
 52178: 'perfunctorily',
 30587: 'lookin',
 40898: "lot'",
 52179: 'lookie',
 34870: 'fearlessly',
 52181: 'libyan',
 40899: 'fondles',
 35714: 'gopher',
 40901: 'wearying',
 52182: "nz's",
 27646: 'minuses',
 52183: 'puposelessly',
 52184: 'shandling',
 31268: 'decapitates',
 11929: 'humming',
 40902: "'nother",
 21914: 'smackdown',
 30588: 'underdone',
 40903: 'frf',
 52185: 'triviality',
 25248: 'fro',
 8777: 'bothers',
 52186: "'kensington",
 73: 'much',
 34730: 'muco',
 22615: 'wiseguy',
 27648: "richie's",
 40904: 'tonino',
 52187: 'unleavened',
 11587: 'fry',
 40905: "'tv'",
 40906: 'toning',
 14361: 'obese',
 30589: 'sensationalized',
 40907: 'spiv',
 6259: 'spit',
 7364: 'arkin',
 21915: 'charleton',
 16823: 'jeon',
 21916: 'boardroom',
 4989: 'doubts',
 3084: 'spin',
 53083: 'hepo',
 27649: 'wildcat',
 10584: 'venoms',
 52191: 'misconstrues',
 18514: 'mesmerising',
 40908: 'misconstrued',
 52192: 'rescinds',
 52193: 'prostrate',
 40909: 'majid',
 16479: 'climbed',
 34731: 'canoeing',
 52195: 'majin',
 57804: 'animie',
 40910: 'sylke',
 14899: 'conditioned',
 40911: 'waddell',
 52196: '3\x85',
 41188: 'hyperdrive',
 34732: 'conditioner',
 53153: 'bricklayer',
 2576: 'hong',
 52198: 'memoriam',
 30592: 'inventively',
 25249: "levant's",
 20638: 'portobello',
 52200: 'remand',
 19504: 'mummified',
 27650: 'honk',
 19505: 'spews',
 40912: 'visitations',
 52201: 'mummifies',
 25250: 'cavanaugh',
 23385: 'zeon',
 40913: "jungle's",
 34733: 'viertel',
 27651: 'frenchmen',
 52202: 'torpedoes',
 52203: 'schlessinger',
 34734: 'torpedoed',
 69876: 'blister',
 52204: 'cinefest',
 34735: 'furlough',
 52205: 'mainsequence',
 40914: 'mentors',
 9094: 'academic',
 20602: 'stillness',
 40915: 'academia',
 52206: 'lonelier',
 52207: 'nibby',
 52208: "losers'",
 40916: 'cineastes',
 4449: 'corporate',
 40917: 'massaging',
 30593: 'bellow',
 19506: 'absurdities',
 53241: 'expetations',
 40918: 'nyfiken',
 75638: 'mehras',
 52209: 'lasse',
 52210: 'visability',
 33946: 'militarily',
 52211: "elder'",
 19023: 'gainsbourg',
 20603: 'hah',
 13420: 'hai',
 34736: 'haj',
 25251: 'hak',
 4311: 'hal',
 4892: 'ham',
 53259: 'duffer',
 52213: 'haa',
 66: 'had',
 11930: 'advancement',
 16825: 'hag',
 25252: "hand'",
 13421: 'hay',
 20604: 'mcnamara',
 52214: "mozart's",
 30731: 'duffel',
 30594: 'haq',
 13887: 'har',
 44: 'has',
 2401: 'hat',
 40919: 'hav',
 30595: 'haw',
 52215: 'figtings',
 15495: 'elders',
 52216: 'underpanted',
 52217: 'pninson',
 27652: 'unequivocally',
 23673: "barbara's",
 52219: "bello'",
 12997: 'indicative',
 40920: 'yawnfest',
 52220: 'hexploitation',
 52221: "loder's",
 27653: 'sleuthing',
 32622: "justin's",
 52222: "'ball",
 52223: "'summer",
 34935: "'demons'",
 52225: "mormon's",
 34737: "laughton's",
 52226: 'debell',
 39724: 'shipyard',
 30597: 'unabashedly',
 40401: 'disks',
 2290: 'crowd',
 10087: 'crowe',
 56434: "vancouver's",
 34738: 'mosques',
 6627: 'crown',
 52227: 'culpas',
 27654: 'crows',
 53344: 'surrell',
 52229: 'flowless',
 52230: 'sheirk',
 40923: "'three",
 52231: "peterson'",
 52232: 'ooverall',
 40924: 'perchance',
 1321: 'bottom',
 53363: 'chabert',
 52233: 'sneha',
 13888: 'inhuman',
 52234: 'ichii',
 52235: 'ursla',
 30598: 'completly',
 40925: 'moviedom',
 52236: 'raddick',
 51995: 'brundage',
 40926: 'brigades',
 1181: 'starring',
 52237: "'goal'",
 52238: 'caskets',
 52239: 'willcock',
 52240: "threesome's",
 52241: "mosque'",
 52242: "cover's",
 17637: 'spaceships',
 40927: 'anomalous',
 27655: 'ptsd',
 52243: 'shirdan',
 21962: 'obscenity',
 30599: 'lemmings',
 30600: 'duccio',
 52244: "levene's",
 52245: "'gorby'",
 25255: "teenager's",
 5340: 'marshall',
 9095: 'honeymoon',
 3231: 'shoots',
 12258: 'despised',
 52246: 'okabasho',
 8289: 'fabric',
 18515: 'cannavale',
 3537: 'raped',
 52247: "tutt's",
 17638: 'grasping',
 18516: 'despises',
 40928: "thief's",
 8926: 'rapes',
 52248: 'raper',
 27656: "eyre'",
 52249: 'walchek',
 23386: "elmo's",
 40929: 'perfumes',
 21918: 'spurting',
 52250: "exposition'\x85",
 52251: 'denoting',
 34740: 'thesaurus',
 40930: "shoot'",
 49759: 'bonejack',
 52253: 'simpsonian',
 30601: 'hebetude',
 34741: "hallow's",
 52254: 'desperation\x85',
 34742: 'incinerator',
 10308: 'congratulations',
 52255: 'humbled',
 5924: "else's",
 40845: 'trelkovski',
 52256: "rape'",
 59386: "'chapters'",
 52257: '1600s',
 7253: 'martian',
 25256: 'nicest',
 52259: 'eyred',
 9457: 'passenger',
 6041: 'disgrace',
 52260: 'moderne',
 5120: 'barrymore',
 52261: 'yankovich',
 40931: 'moderns',
 52262: 'studliest',
 52263: 'bedsheet',
 14900: 'decapitation',
 52264: 'slurring',
 52265: "'nunsploitation'",
 34743: "'character'",
 9880: 'cambodia',
 52266: 'rebelious',
 27657: 'pasadena',
 40932: 'crowne',
 52267: "'bedchamber",
 52268: 'conjectural',
 52269: 'appologize',
 52270: 'halfassing',
 57816: 'paycheque',
 20606: 'palms',
 52271: "'islands",
 40933: 'hawked',
 21919: 'palme',
 40934: 'conservatively',
 64007: 'larp',
 5558: 'palma',
 21920: 'smelling',
 12998: 'aragorn',
 52272: 'hawker',
 52273: 'hawkes',
 3975: 'explosions',
 8059: 'loren',
 52274: "pyle's",
 6704: 'shootout',
 18517: "mike's",
 52275: "driscoll's",
 40935: 'cogsworth',
 52276: "britian's",
 34744: 'childs',
 52277: "portrait's",
 3626: 'chain',
 2497: 'whoever',
 52278: 'puttered',
 52279: 'childe',
 52280: 'maywether',
 3036: 'chair',
 52281: "rance's",
 34745: 'machu',
 4517: 'ballet',
 34746: 'grapples',
 76152: 'summerize',
 30603: 'freelance',
 52283: "andrea's",
 52284: '\x91very',
 45879: 'coolidge',
 18518: 'mache',
 52285: 'balled',
 40937: 'grappled',
 18519: 'macha',
 21921: 'underlining',
 5623: 'macho',
 19507: 'oversight',
 25257: 'machi',
 11311: 'verbally',
 21922: 'tenacious',
 40938: 'windshields',
 18557: 'paychecks',
 3396: 'jerk',
 11931: "good'",
 34748: 'prancer',
 21923: 'prances',
 52286: 'olympus',
 21924: 'lark',
 10785: 'embark',
 7365: 'gloomy',
 52287: 'jehaan',
 52288: 'turaqui',
 20607: "child'",
 2894: 'locked',
 52289: 'pranced',
 2588: 'exact',
 52290: 'unattuned',
 783: 'minute',
 16118: 'skewed',
 40940: 'hodgins',
 34749: 'skewer',
 52291: 'think\x85',
 38765: 'rosenstein',
 52292: 'helmit',
 34750: 'wrestlemanias',
 16826: 'hindered',
 30604: "martha's",
 52293: 'cheree',
 52294: "pluckin'",
 40941: 'ogles',
 11932: 'heavyweight',
 82190: 'aada',
 11312: 'chopping',
 61534: 'strongboy',
 41342: 'hegemonic',
 40942: 'adorns',
 41346: 'xxth',
 34751: 'nobuhiro',
 52298: 'capitães',
 52299: 'kavogianni',
 13422: 'antwerp',
 6538: 'celebrated',
 52300: 'roarke',
 40943: 'baggins',
 31270: 'cheeseburgers',
 52301: 'matras',
 52302: "nineties'",
 52303: "'craig'",
 12999: 'celebrates',
 3383: 'unintentionally',
 14362: 'drafted',
 52304: 'climby',
 52305: '303',
 18520: 'oldies',
 9096: 'climbs',
 9655: 'honour',
 34752: 'plucking',
 30074: '305',
 5514: 'address',
 40944: 'menjou',
 42592: "'freak'",
 19508: 'dwindling',
 9458: 'benson',
 52307: 'white’s',
 40945: 'shamelessness',
 21925: 'impacted',
 52308: 'upatz',
 3840: 'cusack',
 37567: "flavia's",
 52309: 'effette',
 34753: 'influx',
 52310: 'boooooooo',
 52311: 'dimitrova',
 13423: 'houseman',
 25259: 'bigas',
 52312: 'boylen',
 52313: 'phillipenes',
 40946: 'fakery',
 27658: "grandpa's",
 27659: 'darnell',
 19509: 'undergone',
 52315: 'handbags',
 21926: 'perished',
 37778: 'pooped',
 27660: 'vigour',
 3627: 'opposed',
 52316: 'etude',
 11799: "caine's",
 52317: 'doozers',
 34754: 'photojournals',
 52318: 'perishes',
 34755: 'constrains',
 40948: 'migenes',
 30605: 'consoled',
 16827: 'alastair',
 52319: 'wvs',
 52320: 'ooooooh',
 34756: 'approving',
 40949: 'consoles',
 52064: 'disparagement',
 52322: 'futureistic',
 52323: 'rebounding',
 52324: "'date",
 52325: 'gregoire',
 21927: 'rutherford',
 34757: 'americanised',
 82196: 'novikov',
 1042: 'following',
 34758: 'munroe',
 52326: "morita'",
 52327: 'christenssen',
 23106: 'oatmeal',
 25260: 'fossey',
 40950: 'livered',
 13000: 'listens',
 76164: "'marci",
 52330: "otis's",
 23387: 'thanking',
 16019: 'maude',
 34759: 'extensions',
 52332: 'ameteurish',
 52333: "commender's",
 27661: 'agricultural',
 4518: 'convincingly',
 17639: 'fueled',
 54014: 'mahattan',
 40952: "paris's",
 52336: 'vulkan',
 52337: 'stapes',
 52338: 'odysessy',
 12259: 'harmon',
 4252: 'surfing',
 23494: 'halloran',
 49580: 'unbelieveably',
 52339: "'offed'",
 30607: 'quadrant',
 19510: 'inhabiting',
 34760: 'nebbish',
 40953: 'forebears',
 34761: 'skirmish',
 52340: 'ocassionally',
 52341: "'resist",
 21928: 'impactful',
 52342: 'spicier',
 40954: 'touristy',
 52343: "'football'",
 40955: 'webpage',
 52345: 'exurbia',
 52346: 'jucier',
 14901: 'professors',
 34762: 'structuring',
 30608: 'jig',
 40956: 'overlord',
 25261: 'disconnect',
 82201: 'sniffle',
 40957: 'slimeball',
 40958: 'jia',
 16828: 'milked',
 40959: 'banjoes',
 1237: 'jim',
 52348: 'workforces',
 52349: 'jip',
 52350: 'rotweiller',
 34763: 'mundaneness',
 52351: "'ninja'",
 11040: "dead'",
 40960: "cipriani's",
 20608: 'modestly',
 52352: "professor'",
 40961: 'shacked',
 34764: 'bashful',
 23388: 'sorter',
 16120: 'overpowering',
 18521: 'workmanlike',
 27662: 'henpecked',
 18522: 'sorted',
 52354: "jōb's",
 52355: "'always",
 34765: "'baptists",
 52356: 'dreamcatchers',
 52357: "'silence'",
 21929: 'hickory',
 52358: 'fun\x97yet',
 52359: 'breakumentary',
 15496: 'didn',
 52360: 'didi',
 52361: 'pealing',
 40962: 'dispite',
 25262: "italy's",
 21930: 'instability',
 6539: 'quarter',
 12608: 'quartet',
 52362: 'padmé',
 52363: "'bleedmedry",
 52364: 'pahalniuk',
 52365: 'honduras',
 10786: 'bursting',
 41465: "pablo's",
 52367: 'irremediably',
 40963: 'presages',
 57832: 'bowlegged',
 65183: 'dalip',
 6260: 'entering',
 76172: 'newsradio',
 54150: 'presaged',
 27663: "giallo's",
 40964: 'bouyant',
 52368: 'amerterish',
 18523: 'rajni',
 30610: 'leeves',
 34767: 'macauley',
 612: 'seriously',
 52369: 'sugercoma',
 52370: 'grimstead',
 52371: "'fairy'",
 30611: 'zenda',
 52372: "'twins'",
 17640: 'realisation',
 27664: 'highsmith',
 7817: 'raunchy',
 40965: 'incentives',
 52374: 'flatson',
 35097: 'snooker',
 16829: 'crazies',
 14902: 'crazier',
 7094: 'grandma',
 52375: 'napunsaktha',
 30612: 'workmanship',
 52376: 'reisner',
 61306: "sanford's",
 52377: '\x91doña',
 6108: 'modest',
 19153: "everything's",
 40966: 'hamer',
 52379: "couldn't'",
 13001: 'quibble',
 52380: 'socking',
 21931: 'tingler',
 52381: 'gutman',
 40967: 'lachlan',
 52382: 'tableaus',
 52383: 'headbanger',
 2847: 'spoken',
 34768: 'cerebrally',
 23490: "'road",
 21932: 'tableaux',
 40968: "proust's",
 40969: 'periodical',
 52385: "shoveller's",
 25263: 'tamara',
 17641: 'affords',
 3249: 'concert',
 87955: "yara's",
 52386: 'someome',
 8424: 'lingering',
 41511: "abraham's",
 34769: 'beesley',
 34770: 'cherbourg',
 28624: 'kagan',
 9097: 'snatch',
 9260: "miyazaki's",
 25264: 'absorbs',
 40970: "koltai's",
 64027: 'tingled',
 19511: 'crossroads',
 16121: 'rehab',
 52389: 'falworth',
 52390: 'sequals',
 ...}


print(type(reverse_word_index))
dir(reverse_word_index)[-11:]   # 딕셔너리 기능 확인

<class 'dict'>

['clear',
 'copy',
 'fromkeys',
 'get',
 'items',
 'keys',
 'pop',
 'popitem',
 'setdefault',
 'update',
 'values']


# reverse_word_index.get(인덱스)       # 인덱스에 해당되는 단어가 출력
# reverse_word_index.get(인덱스, '?')  # 인덱스에 해당되는 단어가 출력되는데, 단어가 없으면 ? 출력
for i in range(0, 50, 1):
    print( reverse_word_index.get(i, '?'), end= "   " )
    
print("train 데이터 첫번째 3단어 살펴보기")
print("index 14 : ", reverse_word_index.get(14-3, '?'))
print("index 22 : ", reverse_word_index.get(22-3, '?'))
print("index 16 : ", reverse_word_index.get(16-3, '?'))

?   the   and   a   of   to   is   br   in   it   i   this   that   was   as   for   with   movie   but   film   on   not   you   are   his   have   he   be   one   all   at   by   an   they   who   so   from   like   her   or   just   about   it's   out   has   if   some   there   what   good   train 데이터 첫번째 3단어 살펴보기
index 14 :  this
index 22 :  film
index 16 :  was


print( train_data[0]  ) # 첫번째 리뷰(숫자 인덱스)

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]


# train_data[0] : 하나의 리뷰(3, 6, 2, 5, 10...) => 218단어 ...
# [reverse_word_index.get(i - 3, '?') for i in train_data[0]]
print(len( train_data[0]) )  # 첫번째 리뷰는 218개 인덱스(단어)로 이루어져 있다.
print([i for i in train_data[0]])  # train_data[0]인덱스가 for문이 돌아가면서 리스트 형태로 만들어진다.

218
[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]


# 두번째 리뷰 - 189개 인덱스(단어)로 이루어져 있고, 이에 대한 리스트를 만들어 출력해 본다.
print(len( train_data[1] ))  # 첫번째 리뷰는 218개 인덱스(단어)로 이루어져 있다.
print([i for i in train_data[1] ])  # train_data[0]인덱스가 for문이 돌아가면서 리스트 형태로 만들어진다.

189
[1, 194, 1153, 194, 8255, 78, 228, 5, 6, 1463, 4369, 5012, 134, 26, 4, 715, 8, 118, 1634, 14, 394, 20, 13, 119, 954, 189, 102, 5, 207, 110, 3103, 21, 14, 69, 188, 8, 30, 23, 7, 4, 249, 126, 93, 4, 114, 9, 2300, 1523, 5, 647, 4, 116, 9, 35, 8163, 4, 229, 9, 340, 1322, 4, 118, 9, 4, 130, 4901, 19, 4, 1002, 5, 89, 29, 952, 46, 37, 4, 455, 9, 45, 43, 38, 1543, 1905, 398, 4, 1649, 26, 6853, 5, 163, 11, 3215, 2, 4, 1153, 9, 194, 775, 7, 8255, 2, 349, 2637, 148, 605, 2, 8003, 15, 123, 125, 68, 2, 6853, 15, 349, 165, 4362, 98, 5, 4, 228, 9, 43, 2, 1157, 15, 299, 120, 5, 120, 174, 11, 220, 175, 136, 50, 9, 4373, 228, 8255, 5, 2, 656, 245, 2350, 5, 4, 9837, 131, 152, 491, 18, 2, 32, 7464, 1212, 14, 9, 6, 371, 78, 22, 625, 64, 1382, 9, 8, 168, 145, 23, 4, 1690, 15, 16, 4, 1355, 5, 28, 6, 52, 154, 462, 33, 89, 78, 285, 16, 145, 95]


# [reverse_word_index.get(i - 3, '?') for i in train_data[0]]
# 의미 : 각각의 인덱스(218)에 대한 단어를 리스트 형태로 만든다.
print("첫번째 리뷰의 인덱스를 단어로 매칭시켜서 보여준 것")
print([reverse_word_index.get(i - 3, '?') for i in train_data[0]])
# ' '.join([reverse_word_index.get(i - 3, '?') for i in train_data[0]])
# 각각의 단어 리스트를 공백하나를 넣어주면서 하나의 문자열로 묶어 준것.

첫번째 리뷰의 인덱스를 단어로 매칭시켜서 보여준 것
['?', 'this', 'film', 'was', 'just', 'brilliant', 'casting', 'location', 'scenery', 'story', 'direction', "everyone's", 'really', 'suited', 'the', 'part', 'they', 'played', 'and', 'you', 'could', 'just', 'imagine', 'being', 'there', 'robert', '?', 'is', 'an', 'amazing', 'actor', 'and', 'now', 'the', 'same', 'being', 'director', '?', 'father', 'came', 'from', 'the', 'same', 'scottish', 'island', 'as', 'myself', 'so', 'i', 'loved', 'the', 'fact', 'there', 'was', 'a', 'real', 'connection', 'with', 'this', 'film', 'the', 'witty', 'remarks', 'throughout', 'the', 'film', 'were', 'great', 'it', 'was', 'just', 'brilliant', 'so', 'much', 'that', 'i', 'bought', 'the', 'film', 'as', 'soon', 'as', 'it', 'was', 'released', 'for', '?', 'and', 'would', 'recommend', 'it', 'to', 'everyone', 'to', 'watch', 'and', 'the', 'fly', 'fishing', 'was', 'amazing', 'really', 'cried', 'at', 'the', 'end', 'it', 'was', 'so', 'sad', 'and', 'you', 'know', 'what', 'they', 'say', 'if', 'you', 'cry', 'at', 'a', 'film', 'it', 'must', 'have', 'been', 'good', 'and', 'this', 'definitely', 'was', 'also', '?', 'to', 'the', 'two', 'little', "boy's", 'that', 'played', 'the', '?', 'of', 'norman', 'and', 'paul', 'they', 'were', 'just', 'brilliant', 'children', 'are', 'often', 'left', 'out', 'of', 'the', '?', 'list', 'i', 'think', 'because', 'the', 'stars', 'that', 'play', 'them', 'all', 'grown', 'up', 'are', 'such', 'a', 'big', 'profile', 'for', 'the', 'whole', 'film', 'but', 'these', 'children', 'are', 'amazing', 'and', 'should', 'be', 'praised', 'for', 'what', 'they', 'have', 'done', "don't", 'you', 'think', 'the', 'whole', 'story', 'was', 'so', 'lovely', 'because', 'it', 'was', 'true', 'and', 'was', "someone's", 'life', 'after', 'all', 'that', 'was', 'shared', 'with', 'us', 'all']


# 0, 1, 2는 '패딩', '문서 시작', '사전에 없음'을 위한 인덱스이므로 3을 뺍니다
decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in train_data[0]])
decoded_review

"? this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert ? is an amazing actor and now the same being director ? father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for ? and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also ? to the two little boy's that played the ? of norman and paul they were just brilliant children are often left out of the ? list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all"


import numpy as np

def vectorize_sequences(sequences, dimension=10000):
    # 크기가 (len(sequences), dimension))이고 모든 원소가 0인 행렬을 만듭니다
    results = np.zeros((len(sequences), dimension))
    
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.  # results[i]에서 특정 인덱스의 위치를 1로 만듭니다
    return results


print("변환 전 : ", train_data.shape)
X_train = vectorize_sequences(train_data)
print("변환 후 : ", X_train.shape)
X_train

변환 전 :  (25000,)
변환 후 :  (25000, 10000)

array([[0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       ...,
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.]])


print("변환 전 : ",test_data.shape)
X_test = vectorize_sequences(test_data)
print("변환 후 : ",X_test.shape)
X_test

변환 전 :  (25000,)
변환 후 :  (25000, 10000)

array([[0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       ...,
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.]])


a = [1,2]
np.asarray(a)

array([1, 2])


print(type(train_labels), type(test_labels))
print(train_labels.shape, test_labels.shape)

<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(25000,) (25000,)


y_train = np.asarray(train_labels).astype('float32')
y_test = np.asarray(test_labels).astype('float32')

print(type(y_train), type(y_test))
y_train.shape, y_test.shape

<class 'numpy.ndarray'> <class 'numpy.ndarray'>

((25000,), (25000,))


y_train

array([1., 0., 0., ..., 0., 1., 0.], dtype=float32)


y_test

array([0., 1., 1., ..., 0., 0., 0.], dtype=float32)


from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))


model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])


from tensorflow.keras import optimizers

model.compile(optimizer=optimizers.RMSprop(learning_rate=0.01),
              loss='binary_crossentropy',
              metrics=['accuracy']  )


X_train.shape, y_train.shape

((25000, 10000), (25000,))


X_val = X_train[:10000]            # 자체 검증
partial_X_train = X_train[10000:]  # 학습용 10000~25000

y_val = y_train[:10000]            # 자체 검증
partial_y_train = y_train[10000:]  # 학습용 10000~25000


history = model.fit(partial_X_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(X_val, y_val))

Epoch 1/20
30/30 [==============================] - 2s 33ms/step - loss: 0.6185 - accuracy: 0.7185 - val_loss: 0.5058 - val_accuracy: 0.7568
Epoch 2/20
30/30 [==============================] - 1s 22ms/step - loss: 0.3194 - accuracy: 0.8754 - val_loss: 0.3907 - val_accuracy: 0.8449
Epoch 3/20
30/30 [==============================] - 1s 22ms/step - loss: 0.2129 - accuracy: 0.9161 - val_loss: 0.4216 - val_accuracy: 0.8510
Epoch 4/20
30/30 [==============================] - 1s 22ms/step - loss: 0.1684 - accuracy: 0.9364 - val_loss: 0.4502 - val_accuracy: 0.8439
Epoch 5/20
30/30 [==============================] - 1s 22ms/step - loss: 0.1206 - accuracy: 0.9571 - val_loss: 0.3564 - val_accuracy: 0.8818
Epoch 6/20
30/30 [==============================] - 1s 22ms/step - loss: 0.0937 - accuracy: 0.9696 - val_loss: 0.5479 - val_accuracy: 0.8514
Epoch 7/20
30/30 [==============================] - 1s 22ms/step - loss: 0.1148 - accuracy: 0.9612 - val_loss: 0.3717 - val_accuracy: 0.8778
Epoch 8/20
30/30 [==============================] - 1s 23ms/step - loss: 0.0657 - accuracy: 0.9793 - val_loss: 0.4201 - val_accuracy: 0.8771
Epoch 9/20
30/30 [==============================] - 1s 23ms/step - loss: 0.0197 - accuracy: 0.9941 - val_loss: 0.6530 - val_accuracy: 0.8678
Epoch 10/20
30/30 [==============================] - 1s 22ms/step - loss: 0.0907 - accuracy: 0.9826 - val_loss: 0.6377 - val_accuracy: 0.8759
Epoch 11/20
30/30 [==============================] - 1s 23ms/step - loss: 0.0054 - accuracy: 0.9993 - val_loss: 0.7571 - val_accuracy: 0.8730
Epoch 12/20
30/30 [==============================] - 1s 22ms/step - loss: 0.0912 - accuracy: 0.9839 - val_loss: 0.7324 - val_accuracy: 0.8717
Epoch 13/20
30/30 [==============================] - 1s 22ms/step - loss: 0.0027 - accuracy: 0.9998 - val_loss: 0.8392 - val_accuracy: 0.8735
Epoch 14/20
30/30 [==============================] - 1s 22ms/step - loss: 0.0015 - accuracy: 0.9997 - val_loss: 0.9948 - val_accuracy: 0.8704
Epoch 15/20
30/30 [==============================] - 1s 22ms/step - loss: 0.1047 - accuracy: 0.9872 - val_loss: 0.9747 - val_accuracy: 0.8626
Epoch 16/20
30/30 [==============================] - 1s 22ms/step - loss: 0.0015 - accuracy: 0.9999 - val_loss: 1.0148 - val_accuracy: 0.8699
Epoch 17/20
30/30 [==============================] - 1s 22ms/step - loss: 4.2119e-04 - accuracy: 0.9999 - val_loss: 1.2154 - val_accuracy: 0.8697
Epoch 18/20
30/30 [==============================] - 1s 23ms/step - loss: 2.1195e-04 - accuracy: 0.9999 - val_loss: 1.3966 - val_accuracy: 0.8689
Epoch 19/20
30/30 [==============================] - 1s 22ms/step - loss: 1.5039e-04 - accuracy: 0.9999 - val_loss: 1.5292 - val_accuracy: 0.8687
Epoch 20/20
30/30 [==============================] - 1s 20ms/step - loss: 3.7695e-06 - accuracy: 1.0000 - val_loss: 1.6868 - val_accuracy: 0.8699


history_dict = history.history
history_dict.keys()

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])


import matplotlib.pyplot as plt


acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.figure(figsize=(12,7))
plt.subplot(1,2,1)
# ‘bo’는 파란색 점을 의미합니다
plt.plot(epochs, loss, 'bo', label='Training loss')
# ‘b’는 파란색 실선을 의미합니다
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('학습과 평가 데이터 셋 - 손실')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1,2,2)
acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('학습과 평가 데이터 셋 - 정확도')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()


plt.show()


model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

hist = model.fit(X_train, y_train, epochs=4, batch_size=512, validation_data=(X_val, y_val))
results = model.evaluate(X_test, y_test)

Epoch 1/4
49/49 [==============================] - 2s 24ms/step - loss: 0.4827 - accuracy: 0.8156 - val_loss: 0.3221 - val_accuracy: 0.9048
Epoch 2/4
49/49 [==============================] - 1s 18ms/step - loss: 0.2778 - accuracy: 0.9088 - val_loss: 0.2105 - val_accuracy: 0.9367
Epoch 3/4
49/49 [==============================] - 1s 19ms/step - loss: 0.2094 - accuracy: 0.9268 - val_loss: 0.1765 - val_accuracy: 0.9422
Epoch 4/4
49/49 [==============================] - 1s 18ms/step - loss: 0.1763 - accuracy: 0.9392 - val_loss: 0.1520 - val_accuracy: 0.9503
782/782 [==============================] - 2s 2ms/step - loss: 0.2963 - accuracy: 0.8814


results

[0.2962694466114044, 0.8813999891281128]


history_dict = hist.history
history_dict.keys()

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])


acc = hist.history['accuracy']
val_acc = hist.history['val_accuracy']
loss = hist.history['loss']
val_loss = hist.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.figure(figsize=(12,7))
plt.subplot(1,2,1)
# ‘bo’는 파란색 점을 의미합니다
plt.plot(epochs, loss, 'bo', label='Training loss')
# ‘b’는 파란색 실선을 의미합니다
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('학습과 평가 데이터 셋 - 손실')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1,2,2)
acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('학습과 평가 데이터 셋 - 정확도')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()


plt.show()


model.predict(X_test)

782/782 [==============================] - 2s 2ms/step

array([[0.2491391 ],
       [0.9993358 ],
       [0.92614776],
       ...,
       [0.15147226],
       [0.11113814],
       [0.67956156]], dtype=float32)


model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer=optimizers.RMSprop(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

hist2 = model.fit(partial_X_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=128,
                    validation_data=(X_val, y_val))

Epoch 1/20
118/118 [==============================] - 2s 11ms/step - loss: 0.4115 - accuracy: 0.8371 - val_loss: 0.2875 - val_accuracy: 0.8887
Epoch 2/20
118/118 [==============================] - 1s 9ms/step - loss: 0.2118 - accuracy: 0.9223 - val_loss: 0.2787 - val_accuracy: 0.8876
Epoch 3/20
118/118 [==============================] - 1s 9ms/step - loss: 0.1554 - accuracy: 0.9413 - val_loss: 0.3085 - val_accuracy: 0.8834
Epoch 4/20
118/118 [==============================] - 1s 9ms/step - loss: 0.1187 - accuracy: 0.9581 - val_loss: 0.3329 - val_accuracy: 0.8812
Epoch 5/20
118/118 [==============================] - 1s 9ms/step - loss: 0.0897 - accuracy: 0.9687 - val_loss: 0.3767 - val_accuracy: 0.8758
Epoch 6/20
118/118 [==============================] - 1s 9ms/step - loss: 0.0675 - accuracy: 0.9771 - val_loss: 0.4446 - val_accuracy: 0.8713
Epoch 7/20
118/118 [==============================] - 1s 9ms/step - loss: 0.0480 - accuracy: 0.9843 - val_loss: 0.5111 - val_accuracy: 0.8693
Epoch 8/20
118/118 [==============================] - 1s 9ms/step - loss: 0.0339 - accuracy: 0.9887 - val_loss: 0.5585 - val_accuracy: 0.8676
Epoch 9/20
118/118 [==============================] - 1s 9ms/step - loss: 0.0233 - accuracy: 0.9923 - val_loss: 0.6355 - val_accuracy: 0.8644
Epoch 10/20
118/118 [==============================] - 1s 9ms/step - loss: 0.0147 - accuracy: 0.9957 - val_loss: 0.7187 - val_accuracy: 0.8601
Epoch 11/20
118/118 [==============================] - 1s 9ms/step - loss: 0.0099 - accuracy: 0.9977 - val_loss: 0.8024 - val_accuracy: 0.8609
Epoch 12/20
118/118 [==============================] - 1s 9ms/step - loss: 0.0052 - accuracy: 0.9989 - val_loss: 0.9183 - val_accuracy: 0.8602
Epoch 13/20
118/118 [==============================] - 1s 9ms/step - loss: 0.0029 - accuracy: 0.9995 - val_loss: 1.0095 - val_accuracy: 0.8553
Epoch 14/20
118/118 [==============================] - 1s 9ms/step - loss: 0.0018 - accuracy: 0.9996 - val_loss: 1.1316 - val_accuracy: 0.8581
Epoch 15/20
118/118 [==============================] - 1s 9ms/step - loss: 8.1083e-04 - accuracy: 0.9999 - val_loss: 1.2461 - val_accuracy: 0.8585
Epoch 16/20
118/118 [==============================] - 1s 9ms/step - loss: 3.3346e-04 - accuracy: 1.0000 - val_loss: 1.3680 - val_accuracy: 0.8563
Epoch 17/20
118/118 [==============================] - 1s 9ms/step - loss: 4.4051e-04 - accuracy: 0.9999 - val_loss: 1.5174 - val_accuracy: 0.8544
Epoch 18/20
118/118 [==============================] - 1s 9ms/step - loss: 5.0479e-05 - accuracy: 1.0000 - val_loss: 1.6037 - val_accuracy: 0.8543
Epoch 19/20
118/118 [==============================] - 1s 9ms/step - loss: 1.6217e-04 - accuracy: 1.0000 - val_loss: 1.7044 - val_accuracy: 0.8563
Epoch 20/20
118/118 [==============================] - 1s 9ms/step - loss: 1.1381e-04 - accuracy: 0.9999 - val_loss: 1.7591 - val_accuracy: 0.8558


history_dict = hist2.history
history_dict.keys()

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])


import matplotlib.pyplot as plt

acc = hist2.history['accuracy']
val_acc = hist2.history['val_accuracy']
loss = hist2.history['loss']
val_loss = hist2.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.figure(figsize=(12,7))
plt.subplot(1,2,1)
# ‘bo’는 파란색 점을 의미합니다
plt.plot(epochs, loss, 'bo', label='Training loss')
# ‘b’는 파란색 실선을 의미합니다
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('학습용, 평가용 데이터 - 손실')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1,2,2)
acc = history_dict['accuracy']
val_acc = history_dict['val_accuracy']

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('학습용, 평가용 데이터 - 정확도')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()


model = models.Sequential()
model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(optimizer=optimizers.RMSprop(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(X_train, y_train, epochs=4, batch_size=128)
results = model.evaluate(X_test, y_test)
print("에폭수 : ", epochs, "배치사이즈 :", 128)
print("loss , accuracy", results)

Epoch 1/4
196/196 [==============================] - 2s 6ms/step - loss: 0.3465 - accuracy: 0.8613
Epoch 2/4
196/196 [==============================] - 1s 7ms/step - loss: 0.2074 - accuracy: 0.9223
Epoch 3/4
196/196 [==============================] - 1s 7ms/step - loss: 0.1677 - accuracy: 0.9380
Epoch 4/4
196/196 [==============================] - 1s 6ms/step - loss: 0.1435 - accuracy: 0.9485
782/782 [==============================] - 2s 2ms/step - loss: 0.3528 - accuracy: 0.8730
에폭수 :  range(1, 21) 배치사이즈 : 128
loss , accuracy [0.3528025448322296, 0.8730400204658508]

영화 리뷰 데이터 분류하기¶

학습 내용¶

데이터 셋¶

목차

1-1 데이터 준비 및 초기 설정

train_data의 리뷰 하나 살펴보기¶

10개의 리뷰에 대한 숫자. 단어 인덱스 최대값은 얼마인가?¶

영화 리뷰 데이터 하나를 영어 단어로 구성된 문장으로 변경해 보기¶

인덱스별 단어들¶

파이썬에서 reverse_word_index가 가지는 기능 확인¶

인덱스 2번 단어 얻기¶

첫번째 리뷰에 대해 확인해 보자.¶

숫자로 이루어진 리뷰를 영문으로 표시해 보기¶

실습해보기¶

모델에 사용하기 위해 데이터 전처리-벡터화¶

리뷰 데이터는 다음과 같이 표현한다.¶

1-2 데이터 전처리

학습 데이터를 벡터로 변환¶

테스트 데이터를 벡터로 변환¶

레이블을 벡터로 바꾸기¶

1-3 신경망 모델 만들기

입력 데이터가 벡터(1D)이고 레이블은 스칼라(1 또는 0)입니다.¶

실습해 보기¶

활성화 함수(Activation)이 필요한 이유¶

손실함수와 optimizer(최적화 함수)를 선택¶

옵티마이저의 매개변수를 설정해야 할 때,¶

다음과 같이 가능함.¶

학습용 데이터 나누기¶

학습¶

학습 내용 시각화를 통해 확인하기¶

실습¶

1-4 적절한 Epoch 돌리기

학습 내용 확인¶

최종 예측 수행¶

추가 해보기¶

추가 실습¶

추가 실습 - 07 배치 사이즈 변경(512->128)¶

확인(loss, accuracy)¶

정리해보기¶