413 lines
11 KiB
C++
413 lines
11 KiB
C++
|
//===-- lib/Parser/token-sequence.cpp -------------------------------------===//
|
||
|
//
|
||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
|
||
|
#include "token-sequence.h"
|
||
|
#include "prescan.h"
|
||
|
#include "flang/Parser/characters.h"
|
||
|
#include "flang/Parser/message.h"
|
||
|
#include "llvm/Support/raw_ostream.h"
|
||
|
|
||
|
namespace Fortran::parser {
|
||
|
|
||
|
TokenSequence &TokenSequence::operator=(TokenSequence &&that) {
|
||
|
clear();
|
||
|
swap(that);
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
void TokenSequence::clear() {
|
||
|
start_.clear();
|
||
|
nextStart_ = 0;
|
||
|
char_.clear();
|
||
|
provenances_.clear();
|
||
|
}
|
||
|
|
||
|
void TokenSequence::pop_back() {
|
||
|
CHECK(!start_.empty());
|
||
|
CHECK(nextStart_ > start_.back());
|
||
|
std::size_t bytes{nextStart_ - start_.back()};
|
||
|
nextStart_ = start_.back();
|
||
|
start_.pop_back();
|
||
|
char_.resize(nextStart_);
|
||
|
provenances_.RemoveLastBytes(bytes);
|
||
|
}
|
||
|
|
||
|
void TokenSequence::shrink_to_fit() {
|
||
|
start_.shrink_to_fit();
|
||
|
char_.shrink_to_fit();
|
||
|
provenances_.shrink_to_fit();
|
||
|
}
|
||
|
|
||
|
void TokenSequence::swap(TokenSequence &that) {
|
||
|
start_.swap(that.start_);
|
||
|
std::swap(nextStart_, that.nextStart_);
|
||
|
char_.swap(that.char_);
|
||
|
provenances_.swap(that.provenances_);
|
||
|
}
|
||
|
|
||
|
std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
|
||
|
std::size_t tokens{start_.size()};
|
||
|
for (; at < tokens; ++at) {
|
||
|
if (!TokenAt(at).IsBlank()) {
|
||
|
return at;
|
||
|
}
|
||
|
}
|
||
|
return tokens; // even if at > tokens
|
||
|
}
|
||
|
|
||
|
// C-style /*comments*/ are removed from preprocessing directive
|
||
|
// token sequences by the prescanner, but not C++ or Fortran
|
||
|
// free-form line-ending comments (//... and !...) because
|
||
|
// ignoring them is directive-specific.
|
||
|
bool TokenSequence::IsAnythingLeft(std::size_t at) const {
|
||
|
std::size_t tokens{start_.size()};
|
||
|
for (; at < tokens; ++at) {
|
||
|
auto tok{TokenAt(at)};
|
||
|
const char *end{tok.end()};
|
||
|
for (const char *p{tok.begin()}; p < end; ++p) {
|
||
|
switch (*p) {
|
||
|
case '/':
|
||
|
return p + 1 >= end || p[1] != '/';
|
||
|
case '!':
|
||
|
return false;
|
||
|
case ' ':
|
||
|
break;
|
||
|
default:
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
void TokenSequence::Put(const TokenSequence &that) {
|
||
|
if (nextStart_ < char_.size()) {
|
||
|
start_.push_back(nextStart_);
|
||
|
}
|
||
|
int offset = char_.size();
|
||
|
for (int st : that.start_) {
|
||
|
start_.push_back(st + offset);
|
||
|
}
|
||
|
char_.insert(char_.end(), that.char_.begin(), that.char_.end());
|
||
|
nextStart_ = char_.size();
|
||
|
provenances_.Put(that.provenances_);
|
||
|
}
|
||
|
|
||
|
void TokenSequence::Put(const TokenSequence &that, ProvenanceRange range) {
|
||
|
std::size_t offset{0};
|
||
|
std::size_t tokens{that.SizeInTokens()};
|
||
|
for (std::size_t j{0}; j < tokens; ++j) {
|
||
|
CharBlock tok{that.TokenAt(j)};
|
||
|
Put(tok, range.OffsetMember(offset));
|
||
|
offset += tok.size();
|
||
|
}
|
||
|
CHECK(offset == range.size());
|
||
|
}
|
||
|
|
||
|
void TokenSequence::Put(
|
||
|
const TokenSequence &that, std::size_t at, std::size_t tokens) {
|
||
|
ProvenanceRange provenance;
|
||
|
std::size_t offset{0};
|
||
|
for (; tokens-- > 0; ++at) {
|
||
|
CharBlock tok{that.TokenAt(at)};
|
||
|
std::size_t tokBytes{tok.size()};
|
||
|
for (std::size_t j{0}; j < tokBytes; ++j) {
|
||
|
if (offset == provenance.size()) {
|
||
|
provenance = that.provenances_.Map(that.start_[at] + j);
|
||
|
offset = 0;
|
||
|
}
|
||
|
PutNextTokenChar(tok[j], provenance.OffsetMember(offset++));
|
||
|
}
|
||
|
CloseToken();
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void TokenSequence::Put(
|
||
|
const char *s, std::size_t bytes, Provenance provenance) {
|
||
|
for (std::size_t j{0}; j < bytes; ++j) {
|
||
|
PutNextTokenChar(s[j], provenance + j);
|
||
|
}
|
||
|
CloseToken();
|
||
|
}
|
||
|
|
||
|
void TokenSequence::Put(const CharBlock &t, Provenance provenance) {
|
||
|
Put(&t[0], t.size(), provenance);
|
||
|
}
|
||
|
|
||
|
void TokenSequence::Put(const std::string &s, Provenance provenance) {
|
||
|
Put(s.data(), s.size(), provenance);
|
||
|
}
|
||
|
|
||
|
void TokenSequence::Put(llvm::raw_string_ostream &ss, Provenance provenance) {
|
||
|
Put(ss.str(), provenance);
|
||
|
}
|
||
|
|
||
|
TokenSequence &TokenSequence::ToLowerCase() {
|
||
|
std::size_t tokens{start_.size()};
|
||
|
std::size_t chars{char_.size()};
|
||
|
std::size_t atToken{0};
|
||
|
for (std::size_t j{0}; j < chars;) {
|
||
|
std::size_t nextStart{atToken + 1 < tokens ? start_[++atToken] : chars};
|
||
|
char *p{&char_[j]};
|
||
|
char const *limit{char_.data() + nextStart};
|
||
|
const char *lastChar{limit - 1};
|
||
|
j = nextStart;
|
||
|
// Skip leading whitespaces
|
||
|
while (p < limit - 1 && *p == ' ') {
|
||
|
++p;
|
||
|
}
|
||
|
// Find last non-whitespace char
|
||
|
while (lastChar > p + 1 && *lastChar == ' ') {
|
||
|
--lastChar;
|
||
|
}
|
||
|
if (IsDecimalDigit(*p)) {
|
||
|
while (p < limit && IsDecimalDigit(*p)) {
|
||
|
++p;
|
||
|
}
|
||
|
if (p >= limit) {
|
||
|
} else if (*p == 'h' || *p == 'H') {
|
||
|
// Hollerith
|
||
|
*p = 'h';
|
||
|
} else if (*p == '_') {
|
||
|
// kind-prefixed character literal (e.g., 1_"ABC")
|
||
|
} else {
|
||
|
// exponent
|
||
|
for (; p < limit; ++p) {
|
||
|
*p = ToLowerCaseLetter(*p);
|
||
|
}
|
||
|
}
|
||
|
} else if (*lastChar == '\'' || *lastChar == '"') {
|
||
|
if (*p == *lastChar) {
|
||
|
// Character literal without prefix
|
||
|
} else if (p[1] == *lastChar) {
|
||
|
// BOZX-prefixed constant
|
||
|
for (; p < limit; ++p) {
|
||
|
*p = ToLowerCaseLetter(*p);
|
||
|
}
|
||
|
} else {
|
||
|
// Literal with kind-param prefix name (e.g., K_"ABC").
|
||
|
for (; *p != *lastChar; ++p) {
|
||
|
*p = ToLowerCaseLetter(*p);
|
||
|
}
|
||
|
}
|
||
|
} else {
|
||
|
for (; p < limit; ++p) {
|
||
|
*p = ToLowerCaseLetter(*p);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
bool TokenSequence::HasBlanks(std::size_t firstChar) const {
|
||
|
std::size_t tokens{SizeInTokens()};
|
||
|
for (std::size_t j{0}; j < tokens; ++j) {
|
||
|
if (start_[j] >= firstChar && TokenAt(j).IsBlank()) {
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
bool TokenSequence::HasRedundantBlanks(std::size_t firstChar) const {
|
||
|
std::size_t tokens{SizeInTokens()};
|
||
|
bool lastWasBlank{false};
|
||
|
for (std::size_t j{0}; j < tokens; ++j) {
|
||
|
bool isBlank{TokenAt(j).IsBlank()};
|
||
|
if (isBlank && lastWasBlank && start_[j] >= firstChar) {
|
||
|
return true;
|
||
|
}
|
||
|
lastWasBlank = isBlank;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
TokenSequence &TokenSequence::RemoveBlanks(std::size_t firstChar) {
|
||
|
std::size_t tokens{SizeInTokens()};
|
||
|
TokenSequence result;
|
||
|
for (std::size_t j{0}; j < tokens; ++j) {
|
||
|
if (!TokenAt(j).IsBlank() || start_[j] < firstChar) {
|
||
|
result.Put(*this, j);
|
||
|
}
|
||
|
}
|
||
|
swap(result);
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
TokenSequence &TokenSequence::RemoveRedundantBlanks(std::size_t firstChar) {
|
||
|
std::size_t tokens{SizeInTokens()};
|
||
|
TokenSequence result;
|
||
|
bool lastWasBlank{false};
|
||
|
for (std::size_t j{0}; j < tokens; ++j) {
|
||
|
bool isBlank{TokenAt(j).IsBlank()};
|
||
|
if (!isBlank || !lastWasBlank || start_[j] < firstChar) {
|
||
|
result.Put(*this, j);
|
||
|
}
|
||
|
lastWasBlank = isBlank;
|
||
|
}
|
||
|
swap(result);
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
TokenSequence &TokenSequence::ClipComment(
|
||
|
const Prescanner &prescanner, bool skipFirst) {
|
||
|
std::size_t tokens{SizeInTokens()};
|
||
|
for (std::size_t j{0}; j < tokens; ++j) {
|
||
|
CharBlock tok{TokenAt(j)};
|
||
|
if (std::size_t blanks{tok.CountLeadingBlanks()};
|
||
|
blanks < tok.size() && tok[blanks] == '!') {
|
||
|
// Retain active compiler directive sentinels (e.g. "!dir$")
|
||
|
for (std::size_t k{j + 1}; k < tokens && tok.size() < blanks + 5; ++k) {
|
||
|
if (tok.begin() + tok.size() == TokenAt(k).begin()) {
|
||
|
tok.ExtendToCover(TokenAt(k));
|
||
|
} else {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
bool isSentinel{false};
|
||
|
if (tok.size() == blanks + 5) {
|
||
|
char sentinel[4];
|
||
|
for (int k{0}; k < 4; ++k) {
|
||
|
sentinel[k] = ToLowerCaseLetter(tok[blanks + k + 1]);
|
||
|
}
|
||
|
isSentinel = prescanner.IsCompilerDirectiveSentinel(sentinel, 4);
|
||
|
}
|
||
|
if (isSentinel) {
|
||
|
} else if (skipFirst) {
|
||
|
skipFirst = false;
|
||
|
} else {
|
||
|
TokenSequence result;
|
||
|
if (j > 0) {
|
||
|
result.Put(*this, 0, j - 1);
|
||
|
}
|
||
|
swap(result);
|
||
|
return *this;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
void TokenSequence::Emit(CookedSource &cooked) const {
|
||
|
if (auto n{char_.size()}) {
|
||
|
cooked.Put(&char_[0], n);
|
||
|
cooked.PutProvenanceMappings(provenances_);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
llvm::raw_ostream &TokenSequence::Dump(llvm::raw_ostream &o) const {
|
||
|
o << "TokenSequence has " << char_.size() << " chars; nextStart_ "
|
||
|
<< nextStart_ << '\n';
|
||
|
for (std::size_t j{0}; j < start_.size(); ++j) {
|
||
|
o << '[' << j << "] @ " << start_[j] << " '" << TokenAt(j).ToString()
|
||
|
<< "'\n";
|
||
|
}
|
||
|
return o;
|
||
|
}
|
||
|
|
||
|
Provenance TokenSequence::GetCharProvenance(std::size_t offset) const {
|
||
|
ProvenanceRange range{provenances_.Map(offset)};
|
||
|
return range.start();
|
||
|
}
|
||
|
|
||
|
Provenance TokenSequence::GetTokenProvenance(
|
||
|
std::size_t token, std::size_t offset) const {
|
||
|
return GetCharProvenance(start_[token] + offset);
|
||
|
}
|
||
|
|
||
|
ProvenanceRange TokenSequence::GetTokenProvenanceRange(
|
||
|
std::size_t token, std::size_t offset) const {
|
||
|
ProvenanceRange range{provenances_.Map(start_[token] + offset)};
|
||
|
return range.Prefix(TokenBytes(token) - offset);
|
||
|
}
|
||
|
|
||
|
ProvenanceRange TokenSequence::GetIntervalProvenanceRange(
|
||
|
std::size_t token, std::size_t tokens) const {
|
||
|
if (tokens == 0) {
|
||
|
return {};
|
||
|
}
|
||
|
ProvenanceRange range{provenances_.Map(start_[token])};
|
||
|
while (--tokens > 0 &&
|
||
|
range.AnnexIfPredecessor(provenances_.Map(start_[++token]))) {
|
||
|
}
|
||
|
return range;
|
||
|
}
|
||
|
|
||
|
ProvenanceRange TokenSequence::GetProvenanceRange() const {
|
||
|
return GetIntervalProvenanceRange(0, start_.size());
|
||
|
}
|
||
|
|
||
|
const TokenSequence &TokenSequence::CheckBadFortranCharacters(
|
||
|
Messages &messages, const Prescanner &prescanner) const {
|
||
|
std::size_t tokens{SizeInTokens()};
|
||
|
for (std::size_t j{0}; j < tokens; ++j) {
|
||
|
CharBlock token{TokenAt(j)};
|
||
|
char ch{token.FirstNonBlank()};
|
||
|
if (ch != ' ' && !IsValidFortranTokenCharacter(ch)) {
|
||
|
if (ch == '!') {
|
||
|
if (prescanner.IsCompilerDirectiveSentinel(token)) {
|
||
|
continue;
|
||
|
} else if (j + 1 < tokens &&
|
||
|
prescanner.IsCompilerDirectiveSentinel(
|
||
|
TokenAt(j + 1))) { // !dir$, &c.
|
||
|
++j;
|
||
|
continue;
|
||
|
}
|
||
|
}
|
||
|
if (ch < ' ' || ch >= '\x7f') {
|
||
|
messages.Say(GetTokenProvenanceRange(j),
|
||
|
"bad character (0x%02x) in Fortran token"_err_en_US, ch & 0xff);
|
||
|
} else {
|
||
|
messages.Say(GetTokenProvenanceRange(j),
|
||
|
"bad character ('%c') in Fortran token"_err_en_US, ch);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return *this;
|
||
|
}
|
||
|
|
||
|
const TokenSequence &TokenSequence::CheckBadParentheses(
|
||
|
Messages &messages) const {
|
||
|
// First, a quick pass with no allocation for the common case
|
||
|
int nesting{0};
|
||
|
std::size_t tokens{SizeInTokens()};
|
||
|
for (std::size_t j{0}; j < tokens; ++j) {
|
||
|
CharBlock token{TokenAt(j)};
|
||
|
char ch{token.OnlyNonBlank()};
|
||
|
if (ch == '(') {
|
||
|
++nesting;
|
||
|
} else if (ch == ')') {
|
||
|
if (nesting-- == 0) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if (nesting != 0) {
|
||
|
// There's an error; diagnose it
|
||
|
std::vector<std::size_t> stack;
|
||
|
for (std::size_t j{0}; j < tokens; ++j) {
|
||
|
CharBlock token{TokenAt(j)};
|
||
|
char ch{token.OnlyNonBlank()};
|
||
|
if (ch == '(') {
|
||
|
stack.push_back(j);
|
||
|
} else if (ch == ')') {
|
||
|
if (stack.empty()) {
|
||
|
messages.Say(GetTokenProvenanceRange(j), "Unmatched ')'"_err_en_US);
|
||
|
return *this;
|
||
|
}
|
||
|
stack.pop_back();
|
||
|
}
|
||
|
}
|
||
|
CHECK(!stack.empty());
|
||
|
messages.Say(
|
||
|
GetTokenProvenanceRange(stack.back()), "Unmatched '('"_err_en_US);
|
||
|
}
|
||
|
return *this;
|
||
|
}
|
||
|
} // namespace Fortran::parser
|