! A simple random vector accumulation model of word meaning ! Each word is assigned a random Gaussian vector, and memory ! representations are formed by summing word vectors for each ! word found in the context (input already parsed into sentences) !********************************************************************** PROGRAM Semantics USE number_generators USE Declare_Constants USE Reading_Tools implicit none character(len=WORD_CHARS) :: Word(MAX_WORDS), & ! List of words as learned Stoplist(STOP_WORDS) ! List of function words to ignore character(len=WORD_CHARS), & allocatable :: Sentence_Word(:) ! Dynamic array for words in sentence character(len=SENT_CHARS) :: Sentence ! Sentence string at a given time point real :: Visual(D,MAX_WORDS), & ! Visual representations of words Memory(D,MAX_WORDS), & Mu, Sigma ! Internal memory for words viewed real, allocatable :: W(:,:), & ! Dynamic vectors for wrods in sentence Context(:,:) ! Context information per word in sentence integer, allocatable :: loc(:) ! Memory location of word in sentence logical, allocatable :: Ignore(:) integer :: i, j, N, Words_Learned, endpoint, line !================BEGIN SIMULATION=================================== call RandSeed ! Initialize r-num generator Mu = 0 ! Setting values for element distribution, Mean Sigma = sqrt(1.0/D) ! and SD Memory = 0 Visual = 0 Word(:) = ' ' call Read_Stoplist(Stoplist) ! Loading list of function words to ignore write(*,*) 'Learning textbase...' DO ! SENTENCE call Read_Sentence(Sentence, 1, BAD_SENT) line = line + 1 if (E_FILE) goto 10 call Count_Words(Sentence, BAD_SENT, N) allocate (Sentence_Word(N), W(D,N), Context(D,N), loc(N), Ignore(N)) W = 0.0 call Parse_Sentence(Sentence, Sentence_Word, Stoplist, Ignore, N) do i = 1, N call Does_Word_Exist(Sentence_Word(i), Word, loc(i), endpoint) if (loc(i) <> 0) then W(:,i) = Visual(:,loc(i)) ! if word has been seen, use same form as before else W(:,i) = Random_Vector(D, Mu, Sigma) ! otherwise, create a new form and add new word to lexicon loc(i) = endpoint if (Words_Learned == MAX_WORDS) goto 8 call Create_New_Word(Sentence_Word(i), Word, W(:,i), Visual, loc(i)) Words_Learned = Words_Learned + 1 8 endif enddo !Coding Context info for each word in the sentence: call Compute_Context(Context, W, N, loc, Ignore) ! Update memory: do i = 1, N Memory(:,loc(i)) = Memory(:,loc(i)) + Context(:,i) enddo deallocate(Sentence_Word, W, Context, loc, Ignore) write(*,*) 'Learned line ', line ENDDO ! SENTENCE 10 write(*,*) 'Processing Complete' write(*,*) 'Words Learned: ', Words_Learned call Store_Matrix (Memory, Word, Words_Learned) ! Save this memory to be queried later close(1) ! Closing input wiki corpus !================END SIMULATION==================================== CONTAINS !Functions and subroutines follow: !***************************************************************** subroutine Read_Stoplist(Stoplist) integer :: i character(len=WORD_CHARS) :: Stoplist(STOP_WORDS) !Loading Stop List words: open(unit=1, file='stoplist', status='old') do i = 1, STOP_WORDS read(1,*) Stoplist(i) enddo close(1) ! Initializing training corpus: open(unit=1, file='wiki.txt', status='old') end subroutine Read_Stoplist !***************************************************************** !**************************************************************** subroutine Compute_Context(Context, W, N, loc, Ignore) integer :: N, i, pos, loc(N) real :: Context(D,N), W(D,N) logical :: Ignore(N) Context = 0.0 do pos = 1, N do i = 1, N ! if the word isn't in the stoplist, create it's context as sum(other words in sentence) if ((.not.(pos==i)) .and. (.not.(Ignore(i))) .and. (.not.(Ignore(pos)))) then Context(:,pos) = Context(:,pos) + W(:,i) endif enddo enddo end subroutine Compute_Context !**************************************************************** !**************************************************************** subroutine Store_Matrix(Memory, Word, Words_Learned) real :: Memory(D,MAX_WORDS) character(len=WORD_CHARS) :: Word(MAX_WORDS) integer :: i, j, Words_Learned, P1(D), P2(D) open(unit=10, file='word_labels.txt', status='replace') open(unit=11, file='matrix.mat', status='replace', form='unformatted') write(10, *) Words_Learned do i = 1, Words_Learned write(10,'(a20)') Word(i) ! writing word label write(11) Memory(:,i) ! and corresponding memory vector enddo close(10) close(11) end subroutine Store_Matrix !**************************************************************** END PROGRAM Semantics