337 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			337 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			C++
		
	
	
	
//===-- lib/Parser/token-sequence.cpp -------------------------------------===//
 | 
						|
//
 | 
						|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | 
						|
// See https://llvm.org/LICENSE.txt for license information.
 | 
						|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
 | 
						|
#include "token-sequence.h"
 | 
						|
#include "flang/Parser/characters.h"
 | 
						|
#include "flang/Parser/message.h"
 | 
						|
#include "llvm/Support/raw_ostream.h"
 | 
						|
 | 
						|
namespace Fortran::parser {
 | 
						|
 | 
						|
TokenSequence &TokenSequence::operator=(TokenSequence &&that) {
 | 
						|
  clear();
 | 
						|
  swap(that);
 | 
						|
  return *this;
 | 
						|
}
 | 
						|
 | 
						|
void TokenSequence::clear() {
 | 
						|
  start_.clear();
 | 
						|
  nextStart_ = 0;
 | 
						|
  char_.clear();
 | 
						|
  provenances_.clear();
 | 
						|
}
 | 
						|
 | 
						|
void TokenSequence::pop_back() {
 | 
						|
  std::size_t bytes{nextStart_ - start_.back()};
 | 
						|
  nextStart_ = start_.back();
 | 
						|
  start_.pop_back();
 | 
						|
  char_.resize(nextStart_);
 | 
						|
  provenances_.RemoveLastBytes(bytes);
 | 
						|
}
 | 
						|
 | 
						|
void TokenSequence::shrink_to_fit() {
 | 
						|
  start_.shrink_to_fit();
 | 
						|
  char_.shrink_to_fit();
 | 
						|
  provenances_.shrink_to_fit();
 | 
						|
}
 | 
						|
 | 
						|
void TokenSequence::swap(TokenSequence &that) {
 | 
						|
  start_.swap(that.start_);
 | 
						|
  std::swap(nextStart_, that.nextStart_);
 | 
						|
  char_.swap(that.char_);
 | 
						|
  provenances_.swap(that.provenances_);
 | 
						|
}
 | 
						|
 | 
						|
std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
 | 
						|
  std::size_t tokens{start_.size()};
 | 
						|
  for (; at < tokens; ++at) {
 | 
						|
    if (!TokenAt(at).IsBlank()) {
 | 
						|
      return at;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return tokens; // even if at > tokens
 | 
						|
}
 | 
						|
 | 
						|
// C-style /*comments*/ are removed from preprocessing directive
 | 
						|
// token sequences by the prescanner, but not C++ or Fortran
 | 
						|
// free-form line-ending comments (//...  and !...) because
 | 
						|
// ignoring them is directive-specific.
 | 
						|
bool TokenSequence::IsAnythingLeft(std::size_t at) const {
 | 
						|
  std::size_t tokens{start_.size()};
 | 
						|
  for (; at < tokens; ++at) {
 | 
						|
    auto tok{TokenAt(at)};
 | 
						|
    const char *end{tok.end()};
 | 
						|
    for (const char *p{tok.begin()}; p < end; ++p) {
 | 
						|
      switch (*p) {
 | 
						|
      case '/':
 | 
						|
        return p + 1 >= end || p[1] != '/';
 | 
						|
      case '!':
 | 
						|
        return false;
 | 
						|
      case ' ':
 | 
						|
        break;
 | 
						|
      default:
 | 
						|
        return true;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return false;
 | 
						|
}
 | 
						|
 | 
						|
void TokenSequence::RemoveLastToken() {
 | 
						|
  CHECK(!start_.empty());
 | 
						|
  CHECK(nextStart_ > start_.back());
 | 
						|
  std::size_t bytes{nextStart_ - start_.back()};
 | 
						|
  nextStart_ = start_.back();
 | 
						|
  start_.pop_back();
 | 
						|
  char_.erase(char_.begin() + nextStart_, char_.end());
 | 
						|
  provenances_.RemoveLastBytes(bytes);
 | 
						|
}
 | 
						|
 | 
						|
void TokenSequence::Put(const TokenSequence &that) {
 | 
						|
  if (nextStart_ < char_.size()) {
 | 
						|
    start_.push_back(nextStart_);
 | 
						|
  }
 | 
						|
  int offset = char_.size();
 | 
						|
  for (int st : that.start_) {
 | 
						|
    start_.push_back(st + offset);
 | 
						|
  }
 | 
						|
  char_.insert(char_.end(), that.char_.begin(), that.char_.end());
 | 
						|
  nextStart_ = char_.size();
 | 
						|
  provenances_.Put(that.provenances_);
 | 
						|
}
 | 
						|
 | 
						|
void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) {
 | 
						|
  std::size_t offset{0};
 | 
						|
  std::size_t tokens{that.SizeInTokens()};
 | 
						|
  for (std::size_t j{0}; j < tokens; ++j) {
 | 
						|
    CharBlock tok{that.TokenAt(j)};
 | 
						|
    Put(tok, range.OffsetMember(offset));
 | 
						|
    offset += tok.size();
 | 
						|
  }
 | 
						|
  CHECK(offset == range.size());
 | 
						|
}
 | 
						|
 | 
						|
void TokenSequence::Put(
 | 
						|
    const TokenSequence &that, std::size_t at, std::size_t tokens) {
 | 
						|
  ProvenanceRange provenance;
 | 
						|
  std::size_t offset{0};
 | 
						|
  for (; tokens-- > 0; ++at) {
 | 
						|
    CharBlock tok{that.TokenAt(at)};
 | 
						|
    std::size_t tokBytes{tok.size()};
 | 
						|
    for (std::size_t j{0}; j < tokBytes; ++j) {
 | 
						|
      if (offset == provenance.size()) {
 | 
						|
        provenance = that.provenances_.Map(that.start_[at] + j);
 | 
						|
        offset = 0;
 | 
						|
      }
 | 
						|
      PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
 | 
						|
    }
 | 
						|
    CloseToken();
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
void TokenSequence::Put(
 | 
						|
    const char *s, std::size_t bytes, Provenance provenance) {
 | 
						|
  for (std::size_t j{0}; j < bytes; ++j) {
 | 
						|
    PutNextTokenChar(s[j], provenance + j);
 | 
						|
  }
 | 
						|
  CloseToken();
 | 
						|
}
 | 
						|
 | 
						|
void TokenSequence::Put(const CharBlock &t, Provenance provenance) {
 | 
						|
  Put(&t[0], t.size(), provenance);
 | 
						|
}
 | 
						|
 | 
						|
void TokenSequence::Put(const std::string &s, Provenance provenance) {
 | 
						|
  Put(s.data(), s.size(), provenance);
 | 
						|
}
 | 
						|
 | 
						|
void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) {
 | 
						|
  Put(ss.str(), provenance);
 | 
						|
}
 | 
						|
 | 
						|
TokenSequence &TokenSequence::ToLowerCase() {
 | 
						|
  std::size_t tokens{start_.size()};
 | 
						|
  std::size_t chars{char_.size()};
 | 
						|
  std::size_t atToken{0};
 | 
						|
  for (std::size_t j{0}; j < chars;) {
 | 
						|
    std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
 | 
						|
    char *p{&char_[j]};
 | 
						|
    char const *limit{char_.data() + nextStart};
 | 
						|
    j = nextStart;
 | 
						|
    if (IsDecimalDigit(*p)) {
 | 
						|
      while (p < limit && IsDecimalDigit(*p)) {
 | 
						|
        ++p;
 | 
						|
      }
 | 
						|
      if (p >= limit) {
 | 
						|
      } else if (*p == 'h' || *p == 'H') {
 | 
						|
        // Hollerith
 | 
						|
        *p = 'h';
 | 
						|
      } else if (*p == '_') {
 | 
						|
        // kind-prefixed character literal (e.g., 1_"ABC")
 | 
						|
      } else {
 | 
						|
        // exponent
 | 
						|
        for (; p < limit; ++p) {
 | 
						|
          *p = ToLowerCaseLetter(*p);
 | 
						|
        }
 | 
						|
      }
 | 
						|
    } else if (limit[-1] == '\'' || limit[-1] == '"') {
 | 
						|
      if (*p == limit[-1]) {
 | 
						|
        // Character literal without prefix
 | 
						|
      } else if (p[1] == limit[-1]) {
 | 
						|
        // BOZX-prefixed constant
 | 
						|
        for (; p < limit; ++p) {
 | 
						|
          *p = ToLowerCaseLetter(*p);
 | 
						|
        }
 | 
						|
      } else {
 | 
						|
        // Literal with kind-param prefix name (e.g., K_"ABC").
 | 
						|
        for (; *p != limit[-1]; ++p) {
 | 
						|
          *p = ToLowerCaseLetter(*p);
 | 
						|
        }
 | 
						|
      }
 | 
						|
    } else {
 | 
						|
      for (; p < limit; ++p) {
 | 
						|
        *p = ToLowerCaseLetter(*p);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return *this;
 | 
						|
}
 | 
						|
 | 
						|
bool TokenSequence::HasBlanks(std::size_t firstChar) const {
 | 
						|
  std::size_t tokens{SizeInTokens()};
 | 
						|
  for (std::size_t j{0}; j < tokens; ++j) {
 | 
						|
    if (start_[j] >= firstChar && TokenAt(j).IsBlank()) {
 | 
						|
      return true;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return false;
 | 
						|
}
 | 
						|
 | 
						|
bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const {
 | 
						|
  std::size_t tokens{SizeInTokens()};
 | 
						|
  bool lastWasBlank{false};
 | 
						|
  for (std::size_t j{0}; j < tokens; ++j) {
 | 
						|
    bool isBlank{TokenAt(j).IsBlank()};
 | 
						|
    if (isBlank && lastWasBlank && start_[j] >= firstChar) {
 | 
						|
      return true;
 | 
						|
    }
 | 
						|
    lastWasBlank = isBlank;
 | 
						|
  }
 | 
						|
  return false;
 | 
						|
}
 | 
						|
 | 
						|
TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) {
 | 
						|
  std::size_t tokens{SizeInTokens()};
 | 
						|
  TokenSequence result;
 | 
						|
  for (std::size_t j{0}; j < tokens; ++j) {
 | 
						|
    if (!TokenAt(j).IsBlank() || start_[j] < firstChar) {
 | 
						|
      result.Put(*this, j);
 | 
						|
    }
 | 
						|
  }
 | 
						|
  swap(result);
 | 
						|
  return *this;
 | 
						|
}
 | 
						|
 | 
						|
TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) {
 | 
						|
  std::size_t tokens{SizeInTokens()};
 | 
						|
  TokenSequence result;
 | 
						|
  bool lastWasBlank{false};
 | 
						|
  for (std::size_t j{0}; j < tokens; ++j) {
 | 
						|
    bool isBlank{TokenAt(j).IsBlank()};
 | 
						|
    if (!isBlank || !lastWasBlank || start_[j] < firstChar) {
 | 
						|
      result.Put(*this, j);
 | 
						|
    }
 | 
						|
    lastWasBlank = isBlank;
 | 
						|
  }
 | 
						|
  swap(result);
 | 
						|
  return *this;
 | 
						|
}
 | 
						|
 | 
						|
TokenSequence &TokenSequence::ClipComment(bool skipFirst) {
 | 
						|
  std::size_t tokens{SizeInTokens()};
 | 
						|
  for (std::size_t j{0}; j < tokens; ++j) {
 | 
						|
    if (TokenAt(j).FirstNonBlank() == '!') {
 | 
						|
      if (skipFirst) {
 | 
						|
        skipFirst = false;
 | 
						|
      } else {
 | 
						|
        TokenSequence result;
 | 
						|
        if (j > 0) {
 | 
						|
          result.Put(*this, 0, j - 1);
 | 
						|
        }
 | 
						|
        swap(result);
 | 
						|
        return *this;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return *this;
 | 
						|
}
 | 
						|
 | 
						|
void TokenSequence::Emit(CookedSource &cooked) const {
 | 
						|
  cooked.Put(&char_[0], char_.size());
 | 
						|
  cooked.PutProvenanceMappings(provenances_);
 | 
						|
}
 | 
						|
 | 
						|
void TokenSequence::Dump(llvm::raw_ostream &o) const {
 | 
						|
  o << "TokenSequence has " << char_.size() << " chars; nextStart_ "
 | 
						|
    << nextStart_ << '\n';
 | 
						|
  for (std::size_t j{0}; j < start_.size(); ++j) {
 | 
						|
    o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString()
 | 
						|
      << "'\n";
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
Provenance TokenSequence::GetTokenProvenance(
 | 
						|
    std::size_t token, std::size_t offset) const {
 | 
						|
  ProvenanceRange range{provenances_.Map(start_[token] + offset)};
 | 
						|
  return range.start();
 | 
						|
}
 | 
						|
 | 
						|
ProvenanceRange TokenSequence::GetTokenProvenanceRange(
 | 
						|
    std::size_t token, std::size_t offset) const {
 | 
						|
  ProvenanceRange range{provenances_.Map(start_[token] + offset)};
 | 
						|
  return range.Prefix(TokenBytes(token) - offset);
 | 
						|
}
 | 
						|
 | 
						|
ProvenanceRange TokenSequence::GetIntervalProvenanceRange(
 | 
						|
    std::size_t token, std::size_t tokens) const {
 | 
						|
  if (tokens == 0) {
 | 
						|
    return {};
 | 
						|
  }
 | 
						|
  ProvenanceRange range{provenances_.Map(start_[token])};
 | 
						|
  while (--tokens > 0 &&
 | 
						|
      range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) {
 | 
						|
  }
 | 
						|
  return range;
 | 
						|
}
 | 
						|
 | 
						|
ProvenanceRange TokenSequence::GetProvenanceRange() const {
 | 
						|
  return GetIntervalProvenanceRange(0, start_.size());
 | 
						|
}
 | 
						|
 | 
						|
const TokenSequence &TokenSequence::CheckBadFortranCharacters(
 | 
						|
    Messages &messages) const {
 | 
						|
  std::size_t tokens{SizeInTokens()};
 | 
						|
  for (std::size_t j{0}; j < tokens; ++j) {
 | 
						|
    CharBlock token{TokenAt(j)};
 | 
						|
    char ch{token.FirstNonBlank()};
 | 
						|
    if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) {
 | 
						|
      if (ch == '!' && j == 0) {
 | 
						|
        // allow in !dir$
 | 
						|
      } else if (ch < ' ' || ch >= '\x7f') {
 | 
						|
        messages.Say(GetTokenProvenanceRange(j),
 | 
						|
            "bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff);
 | 
						|
      } else {
 | 
						|
        messages.Say(GetTokenProvenanceRange(j),
 | 
						|
            "bad character ('%c') in Fortran token"_err_en_US, ch);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return *this;
 | 
						|
}
 | 
						|
} // namespace Fortran::parser
 |